提取码:x2z3
训练集和测试集划分
问题一: 测试集不包括GroundTruth
下载下来后,有CT/MR两种数据,都是dcm格式,每一张就是一个slice. 对于核磁共振图像,一共有40个病例,训练集和测试集各分了20例。比较坑的是,测试集并没有给出GroundTruth的图像,可能是比赛需要吧. 因而我们只能将使用一半的病例,并将其重新划分成训练和测试集。
我使用了16个病例做训练集,4个病例做测试集.
问题二:T1和T2的数据能一起训练吗?
据我的实验结果来看,仅仅用Unet分割是不能一起训练的,最终的结果会导致严重的过拟合;T1,T2图像可能在模态上还是有比较大的区别.
我对医学不懂,查阅资料:
- T1加权成像(T1WI)是指突出组织T纵向弛豫差别。t1越短,指信号越强,t1越长,指信号越弱,t1一般用于观察解剖。
- T2加权成像(T2WI)是指突出组织T2横向弛豫差别。t2越短,是指信号越弱,t2越长,则信号越强,一般t2有利于观察病变,对出血较敏感。
最终我选择使用T1的图像训练.
问题三: T1的图像在文件夹中分成了InPhase和OutPhase,这个有什么区别?
InPhase和Outphase应该是T1模态图像的相位相反的两种呈现,我只选用了InPhase进行了训练,因为它跟GroundTruth的命名是一样的,处理起来也方便一点.
下面是分离数据集的代码:
"""
@ Date: 2020/6/29
@ Author: Xiao Zhuo
@ Brief: Split CHAOS DataSet into my directory
@ Filename: split\_dataset\_1.py
"""
# -\*- coding: utf-8 -\*-
import os
import shutil
import random
dst_TrainData = "./data/train/Data"
dst_TrainGround = "./data/train/Ground"
dst_TestData = "./data/val/Data"
dst_TestGround = "./data/val/Ground"
def collect\_T1\_name(patient_dir):
ground_paths = list()
inphase_paths = list()
t1_datadir = os.path.join(patient_dir, "T1DUAL")
ground_dir = os.path.join(t1_datadir, "Ground")
ground_names = os.listdir(ground_dir)
nums_ground = len(ground_names)
# 拼接Ground文件夹的文件,存入到ground\_paths列表中
for i in range(nums_ground):
ground_paths.append(os.path.join(ground_dir, ground_names[i]))
inphase_dir = os.path.join(t1_datadir, "DICOM\_anon", "InPhase")
inphase_names = os.listdir(inphase_dir)
nums_inphase = len(inphase_names)
# 拼接inphase文件夹的文件,存入到inphase\_paths列表中
for i in range(nums_inphase):
inphase_paths.append(os.path.join(inphase_dir, inphase_names[i]))
return ground_paths, inphase_paths
if __name__ == '\_\_main\_\_':
dataset_dir = os.path.join("CHAOS\_Train\_Sets", "Train\_Sets", "MR")
train_pct = 0.8
test_pct = 0.2
for root, dirs, files in os.walk(dataset_dir):
random.shuffle(dirs)
dir_count = len(dirs)
train_point = int(dir_count \* train_pct)
i = 0
for sub_dir in dirs: # sub\_dir代表病人编号
if i < train_point:
patient_dir = os.path.join(root, sub_dir)
ground_paths, inphase_paths = collect_T1_name(patient_dir)
for num in range(len(ground_paths)):
dst_groundpath = os.path.join(dst_TrainGround, "T1\_Patient%s\_No%d.png" % (sub_dir, num))
shutil.copy(ground_paths[num], dst_groundpath)
## 下面待修改
for num in range(len(inphase_paths)):
dst_inphasepath = os.path.join(dst_TrainData, "T1\_Patient%s\_No%d.dcm" % (sub_dir, num))
shutil.copy(inphase_paths[num], dst_inphasepath)
i += 1
else:
patient_dir = os.path.join(root, sub_dir)
ground_paths, inphase_paths = collect_T1_name(patient_dir)
for num in range(len(ground_paths)):
dst_groundpath = os.path.join(dst_TestGround, "T1\_Patient%s\_No%d.png" % (sub_dir, num))
shutil.copy(ground_paths[num], dst_groundpath)
for num in range(len(inphase_paths)):
dst_inphasepath = os.path.join(dst_TestData, "T1\_Patient%s\_No%d.dcm" % (sub_dir, num))
shutil.copy(inphase_paths[num], dst_inphasepath)
i += 1
该段程序的作用就是将训练集中T1/InPhase 的20个病例划分成16个训练集,4个测试集,并重新存储到自定义的文件夹下. 对于图像文件也进行了命名规范,对第i个病人的第j张slice,命名规则为T1_Patienti_Noj.dcm
分离出肝脏
GroundTruth的图像是多器官的,根据灰度范围进行判断。
从自带的config文件中可以查看灰度范围:
因为GroundTruth是png格式,我们使用OpenCV做一下简单的阈值处理就可以提取肝脏部分了.
"""
@ Date: 2020/6/29
@ Author: Xiao Zhuo
@ Brief: Extract liver part from GroundTruth and set white color
@ Filename: extract\_only\_liver\_2.py
"""
# -\*- coding: utf-8 -\*-
import os
import cv2
def makedir(dir):
if not os.path.exists(dir):
os.mkdir(dir)
def extract\_liver(dataset_dir):
src_names = os.listdir(dataset_dir)
if src_names[0] == 'Liver':
src_names.remove('Liver')
src_count = len(src_names)
dst_dir = os.path.join(dataset_dir, "Liver")
makedir(dst_dir)
for num in range(src_count):
src_path = os.path.join(dataset_dir, src_names[num])
src = cv2.imread(src_path) # OpenCV读进来要指定是灰度图像,不然默认三通道。这里之前忘记指定了
# flag = 0
flag = 1
for i in range(src.shape[0]):
for j in range(src.shape[1]):
for k in range(src.shape[2]):
if 55 <= src.item(i, j, k) <= 70:
flag = 1 # 表示有肝脏
src.itemset((i, j, k), 255)
else:
src.itemset((i, j, k), 0)
if flag == 1:
dst_path = os.path.join(dst_dir, src_names[num])
cv2.imwrite(dst_path, src)
if __name__ == '\_\_main\_\_':
train_dir = os.path.join("data", "train", "Ground")
test_dir = os.path.join("data", "val", "Ground")
extract_liver(train_dir)
extract_liver(test_dir)
提取后的肝脏二值化掩膜如图所示:
将dicom文件转换为png格式
这一步实际上也可以不做,原因是dicom中的图像数据原本是16位的,若是转换成8位的png格式可能会导致数据精度丢失。使用SimpleITK直接读取Array送入U-net其实就可以运行了.
但我要多此一举的原因是,我想做数据增强. 但是现有的数据增强工具好像不能处理Array或者numpy等格式的数据,自己又没有那个水平重新写一个数据增强的API。没办法,就转换成png简单处理吧.
"""
# @file name : conver2png.py
# @author : Peter
# @date : 2020-07-01
# @brief : 将dicom格式转换成png格式
"""
import pydicom
import os
import matplotlib.pyplot as plt
from skimage import img_as_float
path_1 = "./data/val/Data"
path_2 = "./data/train/Data"
def dicom\_2png(orifile, savefile, width, height):
_currFile = orifile
dcm = pydicom.dcmread(orifile)
# fileName = os.path.basename(file)
imageX = dcm.pixel_array
temp = imageX.copy()
picMax = imageX.max()
vmin = imageX.min()
vmax = temp[temp < picMax].max()
# print("vmin : ", vmin)
# print("vmax : ", vmax)
imageX[imageX > vmax] = 0
imageX[imageX < vmin] = 0
# result = exposure.is\_low\_contrast(imageX)
# # print(result)
image = img_as_float(imageX)
plt.cla()
plt.figure('adjust\_gamma', figsize=(width/100, height/100))
plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0)
plt.imshow(image, 'gray')
plt.axis('off')
plt.savefig(savefile)
if __name__ == '\_\_main\_\_':
names = os.listdir(path_1)
for i in range(len(names)):
dicom_path = os.path.join(path_1, names[i])
png_name = os.path.splitext(names[i])[0]
dst_path = os.path.join('./data/val/Data\_8bit', (png_name + '.png'))
dicom_2png(dicom_path, dst_path, 256, 256)
names = os.listdir(path_2)
for i in range(len(names)):
dicom_path = os.path.join(path_2, names[i])
png_name = os.path.splitext(names[i])[0]
dst_path = os.path.join('./data/train/Data\_8bit', (png_name + '.png'))
dicom_2png(dicom_path, dst_path, 256, 256)
转换后一目了然,不需要再用MicroDicom去查看
数据增强
我使用Augmentor工具.
# 导入数据增强工具
import Augmentor
# 确定原始图像存储路径以及掩码文件存储路径
p = Augmentor.Pipeline("./data/train/Data")
p.ground_truth("./data/train/Ground")
# 图像旋转: 按照概率0.8执行,最大左旋角度10,最大右旋角度10
p.rotate(probability=0.8, max_left_rotation=10, max_right_rotation=10)
# 图像左右互换: 按照概率0.5执行
p.flip_left_right(probability=0.5)
# 图像放大缩小: 按照概率0.8执行,面积为原始图0.85倍
p.zoom_random(probability=0.3, percentage_area=0.85)
# 最终扩充的数据样本数
p.sample(400)
当然,增强的图片还可以重新命个名,按照序号来:
import os
Data_path = "./data/train/Data\_aug"
Ground_path = "./data/train/Ground\_aug"
data_names = os.listdir(Data_path)
ground_names = os.listdir(Ground_path)
for i in range(len(data_names)):
used_name = os.path.join(Data_path, data_names[i])
new_name = os.path.join(Data_path, "Aug\_No\_%d.png" % i)
os.rename(used_name, new_name)
for i in range(len(ground_names)):
used_name = os.path.join(Ground_path, ground_names[i])
new_name = os.path.join(Ground_path, "Aug\_No\_%d.png" % i)
os.rename(used_name, new_name)
网络搭建和训练部分,我使用的是Python3.7 + Pytorch 1.4.0.
U-net网络搭建
就是经典的网络结构,不过我加了尝试加了几个Dropout层.
"""
@ filename: unet.py
"""
import torch
from torch import nn
class DoubleConv(nn.Module):
def \_\_init\_\_(self, in_ch, out_ch):
super(DoubleConv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, input):
return self.conv(input)
class Unet(nn.Module):
def \_\_init\_\_(self, in_ch, out_ch):
super(Unet, self).__init__()
self.conv1 = DoubleConv(in_ch, 64)
self.pool1 = nn.MaxPool2d(2)
self.conv2 = DoubleConv(64, 128)
self.pool2 = nn.MaxPool2d(2)
self.conv3 = DoubleConv(128, 256)
self.pool3 = nn.MaxPool2d(2)
self.conv4 = DoubleConv(256, 512)
self.pool4 = nn.MaxPool2d(2)
self.conv5 = DoubleConv(512, 1024)
self.up6 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
self.conv6 = DoubleConv(1024, 512)
self.up7 = nn.ConvTranspose2d(512, 256, 2, stride=2)
self.conv7 = DoubleConv(512, 256)
self.up8 = nn.ConvTranspose2d(256, 128, 2, stride=2)
self.conv8 = DoubleConv(256, 128)
self.up9 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.conv9 = DoubleConv(128, 64)
self.conv10 = nn.Conv2d(64, out_ch, 1)
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
c1 = self.conv1(x)
p1 = self.pool1(c1)
p1 = self.dropout(p1)
c2 = self.conv2(p1)
p2 = self.pool2(c2)
p2 = self.dropout(p2)
c3 = self.conv3(p2)
p3 = self.pool3(c3)
p3 = self.dropout(p3)
c4 = self.conv4(p3)
p4 = self.pool4(c4)
p4 = self.dropout(p4)
c5 = self.conv5(p4)
up_6 = self.up6(c5)
merge6 = torch.cat([up_6, c4], dim=1)
merge6 = self.dropout(merge6)
c6 = self.conv6(merge6)
up_7 = self.up7(c6)
merge7 = torch.cat([up_7, c3], dim=1)
merge7 = self.dropout(merge7)
c7 = self.conv7(merge7)
up_8 = self.up8(c7)
merge8 = torch.cat([up_8, c2], dim=1)
merge8 = self.dropout(merge8)
c8 = self.conv8(merge8)
up_9 = self.up9(c8)
merge9 = torch.cat([up_9, c1], dim=1)
merge9 = self.dropout(merge9)
c9 = self.conv9(merge9)
c10 = self.conv10(c9)
# out = nn.Sigmoid()(c10)
return c10
自定义Dataset
make_dataset方法获取原始图像和分割掩膜的图像路径名,LiverDateset类继承torch的数据集类,通过make_dataset的路径名利用PIL Image库读取文件,并进行transforms变换成归一化的Tensor数据.
"""
@ filename: dataset.py
@ author: Peter Xiao
@ Date: 2020/5/1
@ Brief: 自定义肝脏数据集
"""
from torch.utils.data import Dataset
import PIL.Image as Image
import os
def make\_dataset(root):
# root = "./data/train"
imgs = []
ori_path = os.path.join(root, "Data")
ground_path = os.path.join(root, "Ground")
names = os.listdir(ori_path)
n = len(names)
for i in range(n):
img = os.path.join(ori_path, names[i])
mask = os.path.join(ground_path, names[i])
imgs.append((img, mask))
return imgs
class LiverDataset(Dataset):
def \_\_init\_\_(self, root, transform=None, target_transform=None):
imgs = make_dataset(root)
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
def \_\_getitem\_\_(self, index):
x_path, y_path = self.imgs[index]
img_x = Image.open(x_path).convert('L')


**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**
**[需要这份系统化资料的朋友,可以戳这里获取](https://bbs.csdn.net/topics/618545628)**
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**
ef \_\_getitem\_\_(self, index):
x_path, y_path = self.imgs[index]
img_x = Image.open(x_path).convert('L')
[外链图片转存中...(img-kpe7IR5h-1714774535776)]
[外链图片转存中...(img-xtp75ss7-1714774535777)]
**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**
**[需要这份系统化资料的朋友,可以戳这里获取](https://bbs.csdn.net/topics/618545628)**
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**