用比较简单的方式调用 MMSegmentation
一、利用命令生成config文件
利用 tools/train.py
生成config文件。
python tools/train.py configs/deeplabv3/deeplabv3-r50-d8512x51220k_voc12aug.py
其中,config文件名参考configs文件夹下的文件。选取自己喜欢的就好,如:
configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py
最后可以在work_dirs里面得到想要的包含所有配置信息的配置文件:如下得到deeplabv3-r50-d8512x51220k_voc12aug.py
其中内容如下:
deeplabv3-r50-d8512x51220k_voc12aug.py
,包含了所有需要配置的信息。
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255,
size=(512, 1024))
model = dict(
type='EncoderDecoder',
data_preprocessor=dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255,
size=(512, 1024)),
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=dict(type='SyncBN', requires_grad=True),
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='ASPPHead',
in_channels=2048,
in_index=3,
channels=512,
dilations=(1, 12, 24, 36),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
train_cfg=dict(),
test_cfg=dict(mode='whole'))
dataset_type = 'CityscapesDataset'
data_root = 'data/cityscapes/'
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[[{
'type': 'Resize',
'scale_factor': 0.5,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 0.75,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.0,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.25,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.5,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.75,
'keep_ratio': True
}],
[{
'type': 'RandomFlip',
'prob': 0.0,
'direction': 'horizontal'
}, {
'type': 'RandomFlip',
'prob': 1.0,
'direction': 'horizontal'
}], [{
'type': 'LoadAnnotations'
}], [{
'type': 'PackSegInputs'
}]])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type='CityscapesDataset',
data_root='data/cityscapes/',
data_prefix=dict(
img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='CityscapesDataset',
data_root='data/cityscapes/',
data_prefix=dict(
img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]))
test_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='CityscapesDataset',
data_root='data/cityscapes/',
data_prefix=dict(
img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]))
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
default_scope = 'mmseg'
env_cfg = dict(
cudnn_benchmark=True,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'))
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='SegLocalVisualizer',
vis_backends=[dict(type='LocalVisBackend')],
name='visualizer')
log_processor = dict(by_epoch=False)
log_level = 'INFO'
load_from = None
resume = False
tta_model = dict(type='SegTTAModel')
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005),
clip_grad=None)
param_scheduler = [
dict(
type='PolyLR',
eta_min=0.0001,
power=0.9,
begin=0,
end=40000,
by_epoch=False)
]
train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))
launcher = 'none'
work_dir = './work_dirs/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024'
需要修改的内容主要如下:
norm_cfg = dict(type='BN', requires_grad=True)
img_dir = 'image'
ann_dir = 'mask'
img_dir_val = 'val/image'
ann_dir_val = 'val/mask'
data_root = '/***/dataset'
dataset_type = 'StanfordBackgroundDataset'
crop_size = (512, 512)
修改后信息如下:其中主要是数据路径和图片大小
norm_cfg = dict(type='BN', requires_grad=True)
img_dir = 'image'
ann_dir = 'mask'
img_dir_val = 'val/image'
ann_dir_val = 'val/mask'
data_root = '/cluster/***/dataset'
dataset_type = 'StanfordBackgroundDataset'
crop_size = (512, 512)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255,
size=crop_size)
model = dict(
type='EncoderDecoder',
data_preprocessor=dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255,
size=crop_size),
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='ASPPHead',
in_channels=2048,
in_index=3,
channels=512,
dilations=(1, 12, 24, 36),
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
train_cfg=dict(),
test_cfg=dict(mode='whole'))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=crop_size,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=crop_size, keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[[{
'type': 'Resize',
'scale_factor': 0.5,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 0.75,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.0,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.25,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.5,
'keep_ratio': True
}, {
'type': 'Resize',
'scale_factor': 1.75,
'keep_ratio': True
}],
[{
'type': 'RandomFlip',
'prob': 0.0,
'direction': 'horizontal'
}, {
'type': 'RandomFlip',
'prob': 1.0,
'direction': 'horizontal'
}], [{
'type': 'LoadAnnotations'
}], [{
'type': 'PackSegInputs'
}]])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path=img_dir, seg_map_path=ann_dir),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=crop_size,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path=img_dir_val, seg_map_path=ann_dir_val),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=crop_size, keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
default_scope = 'mmseg'
env_cfg = dict(
cudnn_benchmark=True,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'))
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='SegLocalVisualizer',
vis_backends=[dict(type='LocalVisBackend')],
name='visualizer')
log_processor = dict(by_epoch=False)
log_level = 'INFO'
load_from = None
resume = False
tta_model = dict(type='SegTTAModel')
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005),
clip_grad=None)
param_scheduler = [
dict(
type='PolyLR',
eta_min=0.0001,
power=0.9,
begin=0,
end=40000,
by_epoch=False)
]
train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))
launcher = 'none'
work_dir = './work_dirs/deeplabv3_r50'
二、修改好配置文件后,需要注册自定义的数据结构
1、在mmseg/datasets中新建文件如:example.py
,其内容如下,classes与palette填写自己的数据类名和自定义颜色:
from mmseg.registry import DATASETS
from .basesegdataset import BaseSegDataset
@DATASETS.register_module()
class StanfordBackgroundDataset(BaseSegDataset):
METAINFO = dict(classes = ('foreground','background'), palette = [[123,43,15],[32,222,113]])
def __init__(self, **kwargs):
super().__init__(img_suffix='.png', seg_map_suffix='.png', **kwargs)
2、然后再 mmseg/datasets中的__init__.py
中进行导入,如:
from .example import StanfordBackgroundDataset
__all__ = ['...','StanfordBackgroundDataset']
三、运行
python tools/train.py path/to/deeplabv3-r50-d8512x51220k_voc12aug.py