- 不清楚可以点击
mmdet
后续陆续增加源码注释
-- mmdetection.configs注意: _base_里面的文件都是基础的配置,后面的配置文件调用之后可以修改,以后面的为准
configs/base/dataset: 基础数据的配置文件 configs/base/models: 基础模型的配置文件 configs/base/schedules: 基础超参数的配置文件 configs/base/default_runtime.py: 基础实时配置文件,包括:模型保存间隔,dist后端配置....etc configs/others: 上层配置文件,调用base里面的配置,然后针对不同模型不同情况重新封装,实际调用以这个配置参数为准,基础只是通用配置。 -- mmdetection.demo /demo/all: 主要是前向计算测试文件 -- mmdetection.mmdet /mmdet/apis: 训练和前向计算实例化 /mmdet/core: anchor和bbox等操作具体实现,并被包裹到registry/mmdet/core/anchor/anchor_generator.py: anchor构建
# 此类是传统anchor生成方法,比如RCNNclass AnchorGenerator(object): # --base_sizes: anchor面积大小,如果为None就使用strides代替 # --scales: anchor大小,base_sizes、strides类似 # --ratios: anchor长宽比 # --strides: anchor在feature map上的步长,也等于feature_map和原图的比例 def __init__(self, strides, ratios, scales=None, base_sizes=None, scale_major=True, octave_base_scale=None, scales_per_octave=None, centers=None, center_offset=0.): pass # 获得基础anchor的数量,比如ssd300->[4,6,6,6,4,4] def num_base_anchors(self): pass # 获得特征层数,比如ssd300->6层 def num_levels(self): pass # 计算基础anchor,等于每个层(6层)最左上角的一点的所有anchor def gen_base_anchors(self): pass # 传入特征大小和步长,计算全部anchor分布,将上述的gen_base_anchors一个点anchor做平移到整个特征图,平移长度为strides def gen_single_level_base_anchors(self, base_size, scales, ratios, center=None): # 和numpy meshgride类似,获得二维feature的xy坐标 def _meshgrid(self, x, y, row_major=True): pass # 有效标志,考虑到padding情况,部分anchor不合理,合理1,不合理0 def valid_flags(self, featmap_sizes, pad_shape, device='cuda'): pass
/mmdet/core/bbox/assigners: 正负样本分配
/mmdet/core/bbox/max_iou_assigner.py: 使用最大IOU进行正负样本分配
class MaxIoUAssigner(BaseAssigner): def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): ''' bboxes: anchor,ssd300生成8732个anchor gt_bboxes: 目标,shape(N,4) gt_bboxes_ignore: 忽略的bbox,部分coco数据集中存在一些非常小的目标或异常目标 gt_labels: 标签,shape(N,1) ''' assign_on_cpu = True if (self.gpu_assign_thr > 0) and ( gt_bboxes.shape[0] > self.gpu_assign_thr) else False # 如果一个图上的目标非常多,建议使用cpu if assign_on_cpu: device = bboxes.device bboxes = bboxes.cpu() gt_bboxes = gt_bboxes.cpu() if gt_bboxes_ignore is not None: gt_bboxes_ignore = gt_bboxes_ignore.cpu() if gt_labels is not None: gt_labels = gt_labels.cpu() overlaps = self.iou_calculator(gt_bboxes, bboxes) # 在gpu上计算iou if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0): if self.ignore_wrt_candidates: ignore_overlaps = self.iou_calculator( bboxes, gt_bboxes_ignore, mode='iof') ignore_max_overlaps, _ = ignore_overlaps.max(dim=1) else: ignore_overlaps = self.iou_calculator( gt_bboxes_ignore, bboxes, mode='iof') ignore_max_overlaps, _ = ignore_overlaps.max(dim=0) overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1 assign_result = self.assign_wrt_overlaps(overlaps, gt_labels) if assign_on_cpu: assign_result.gt_inds = assign_result.gt_inds.to(device) assign_result.max_overlaps = assign_result.max_overlaps.to(device) if assign_result.labels is not None: assign_result.labels = assign_result.labels.to(device) return assign_result def assign_wrt_overlaps(self, overlaps, gt_labels=None): num_gts, num_bboxes = overlaps.size(0), overlaps.size(1) # 由于每次传入的是一张图,所以assigned_gt_inds是8732*1即可,将其作为所以anchor的标志位 # 1. 初始化为-1 assigned_gt_inds = overlaps.new_full((num_bboxes, ), -1, dtype=torch.long) # 无目标情况,一般不会出现这种情况 if num_gts == 0 or num_bboxes == 0: # No ground truth or boxes, return empty assignment max_overlaps = overlaps.new_zeros((num_bboxes, )) if num_gts == 0: # No truth, assign everything to background assigned_gt_inds[:] = 0 if gt_labels is None: assigned_labels = None else: assigned_labels = overlaps.new_full((num_bboxes, ), -1, dtype=torch.long) return AssignResult( num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) # for each anchor, which gt best overlaps with it # for each anchor, the max iou of all gts max_overlaps, argmax_overlaps = overlaps.max(dim=0) # shape:(1*8732),比较N个目标保留一个最大值,获得每个位置(anchor)的最大IOU框 # for each gt, which anchor best overlaps with it # for each gt, the max iou of all proposals gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1) # shape:(N*1),比较每个目标的8732个框保留一个最大值,获得每个目标的最大IOU框 # 2. assign negative: below # the negative inds are set to be 0 if isinstance(self.neg_iou_thr, float): assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < self.neg_iou_thr)] = 0 # 负样本IOU->[0,neg_iou_thr] elif isinstance(self.neg_iou_thr, tuple): assert len(self.neg_iou_thr) == 2 assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0]) & (max_overlaps < self.neg_iou_thr[1])] = 0 # 负样本IOU->[neg_iou_thr[0],neg_iou_thr[1]] # 3. assign positive: above positive IoU threshold pos_inds = max_overlaps >= self.pos_iou_thr assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 # 正样本使用目标ID[0-N],加一防止第一个目标index=0和负样本冲突 # 防止不同目标靠的太近(anchor分配不合理等,特殊情况也会出现),假设存在目标B和C,anchor为a_f/a_g, a_f的交集(B@80%, C@79%), a_g(B@81%, C@80%), 那么C与两个anchor # 的交集始终没有B的交集大,那么两个anchor全部当做B类进行处理,C如何回归? # 下面的方法也无法100%保证所有目标都分配到anchor,但可避免上述情况 if self.match_low_quality: # Low-quality matching will overwirte the assigned_gt_inds assigned # in Step 3. Thus, the assigned gt might not be the best one for # prediction. # For example, if bbox A has 0.9 and 0.8 iou with GT bbox 1 & 2, # bbox 1 will be assigned as the best target for bbox A in step 3. # However, if GT bbox 2's gt_argmax_overlaps = A, bbox A's # assigned_gt_inds will be overwritten to be bbox B. # This might be the reason that it is not used in ROI Heads. for i in range(num_gts): if gt_max_overlaps[i] >= self.min_pos_iou: if self.gt_max_assign_all: max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] assigned_gt_inds[max_iou_inds] = i + 1 else: assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 if gt_labels is not None: assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1) pos_inds = torch.nonzero( assigned_gt_inds > 0, as_tuple=False).squeeze() if pos_inds.numel() > 0: assigned_labels[pos_inds] = gt_labels[ assigned_gt_inds[pos_inds] - 1] else: assigned_labels = None return AssignResult( num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) # 返回一个类,将参数传入其中
/mmdet/core/bbox/coder: 编码anchor和output
/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py: SSD方式编码 /mmdet/core/bbox/coder/iou_calculators: IOU具体操作在这里实现 /mmdet/core/bbox/coder/samplers: 正负样本采样,针对样本不均衡等问题,SSD使用负样本挖掘方式 /mmdet/core/bbox/coder/samplers/sampling_result.py: 和上面的assign_result类似,都是将数据写入一个类中,方便管理和操作
/mmdet/datasets: 数据读取处理函数
/datasets/pipelines: 数据增强具体实现和Compose
/datasets/samplers: -- distributed_sampler.py: 重写了distributed_sampler类,和torch原版一点没变,仅仅改了名字。 -- group_sampler.py:
class GroupSampler(Sampler): # samples_per_gpu: 使用的GPU数量 def __init__(self, dataset, samples_per_gpu=3): assert hasattr(dataset, 'flag') # 数据中的变量,用来分配类别,在datasets/cumtom.py定义 self.dataset = dataset self.samples_per_gpu = samples_per_gpu self.flag = dataset.flag.astype(np.int64) self.group_sizes = np.bincount(self.flag) self.num_samples = 0 for i, size in enumerate(self.group_sizes): self.num_samples += int(np.ceil( size / self.samples_per_gpu)) * self.samples_per_gpu # 不是整数取最大值 def __iter__(self): indices = [] for i, size in enumerate(self.group_sizes): if size == 0: continue indice = np.where(self.flag == i)[0] assert len(indice) == size np.random.shuffle(indice) # random sample num_extra = int(np.ceil(size / self.samples_per_gpu) ) * self.samples_per_gpu - len(indice) # 不能整除的额外数据 数量 indice = np.concatenate( [indice, np.random.choice(indice, num_extra)]) # 不能整除的额外数据 使用前面数据随机取出的数补充 indices.append(indice) indices = np.concatenate(indices) indices = [ indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] for i in np.random.permutation( range(len(indices) // self.samples_per_gpu)) # 分配到每个GPU ] indices = np.concatenate(indices) indices = indices.astype(np.int64).tolist() assert len(indices) == self.num_samples return iter(indices)
/torch/utils/data/dataset:
class ConcatDataset(Dataset): def __init__(self, datasets): self.cumulative_sizes = self.cumsum(self.datasets) # 叠加长度总和[len_1, len_1+len_2, len_1+len_2+len_3] def __len__(self): return self.cumulative_sizes[-1]#总长度 def __getitem__(self, idx): # 反向索引 if idx < 0: if -idx > len(self): raise ValueError("absolute value of index should not exceed dataset length") idx = len(self) + idx # 二分查找子数据集 dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) if dataset_idx == 0: sample_idx = idx else: sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] return self.datasets[dataset_idx][sample_idx] # 获得 指定子数据集 的 指定位置数据 # 老版本名字已更改,可以更改数据集长度 def cummulative_sizes(self): warnings.warn("cummulative_sizes attribute is renamed to " "cumulative_sizes", DeprecationWarning, stacklevel=2) return self.cumulative_sizes
/datasets/builder: 实例化数据相关任务:sample、dataloader、dataset
/datasets/dataset_wrappers.py: 重写concatDataset、RepeatDataset上面已经详细说明,增加数据类别平衡类(具体没看) /datasets/custom.py:
@DATASETS.register_module()class CustomDataset(Dataset): CLASSES = None #种类名称,可以直接定义(常用直接类内定义),也可以外部传入 # 读取全部标签,格式如下: ‘’‘ { 'filename': 'a.jpg', 'width': 1280, 'height': 720, 'ann': { 'bboxes':(n, 4), 'labels': (n, ), 'bboxes_ignore': (k, 4), (optional field) 'labels_ignore': (k, 4) (optional field) } }, ’‘’ def load_annotations(self, ann_file): pass # 暂不确定用途 def load_proposals(self, proposal_file): pass # 过滤不符合条件数据 def _filter_imgs(self, min_size=32): pass # 获取单个train数据 def prepare_train_img(self, idx): pass # 获取单个test数据 def prepare_test_img(self, idx): # 获得单个图像标注信息 def get_ann_info(self, idx): pass # 随机选择数据,会使用_set_group_flag def _rand_another(self, idx): pass # 按特定格式给图像分类(原始使用长宽比) def _set_group_flag(self): pass
整个数据读取流程比较清晰:
graph TD A_1[准备特定格式label] --> A_2 A_2[读取全部label] --> A_3(过滤不合适label) A_3 --> C{train/test} C -->|train | D[读取图像信息+label信息] C -->|test| E[和train类似] D --> D_1{合适/不合适} D_1 --> |不合适| D_2(随机选取) D_1 --> |合适| D_3(直接选取)
/mmdet/models: 模型实际实现函数
/mmdet.ops: 需要快速实现的操作,如:NMS、ROIPooling、ROIAlign.... /mmdet/utils: 一些辅助操作,环境变量和版本等 -- mmdetection.tests /tests/all: 测试脚本,可以用来查看原理和测试 -- mmdetection.tools /tools/all: 杂七杂八文件,包括:训练+测试(仅是入口,实际操作在apis之内),数据转换、计算MAC、转换模型ONNX..... /tools/train.py: 单机单卡 /tools/dist_train.py: 单机单多卡,使用distribution /tools/slurm_train.py: 多机多卡大致流程:
- 准备数据集,在mmdet/datasets
- 准备模型,在mmdet/models, loss函数在models里面实现
- 准备特殊函数,在/mmdet/core,一些mmdet没有的操作
- 配置参数,在/configs, 基础配置可选,后面的参数必须配置
- 训练模型,在/mmdet/tools, 调用评估可在配置里设置
- 前向推理,在/demo
Already open...
...