Refine docs and fix format issues (#3349)

* Refine docs and fix format issues

* reformat

* fix mock

* Refine format

* Reorganize tutorials
pull/3360/head
Wenwei Zhang 5 years ago committed by GitHub
parent adc65ffc3e
commit ec7d83bb4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .dev_scripts/linter.sh
  2. 2
      .github/CONTRIBUTING.md
  3. 4
      .style.yapf
  4. 5
      docs/conf.py
  5. 14
      docs/index.rst
  6. 7
      docs/tutorials/index.rst
  7. 29
      mmdet/core/anchor/anchor_generator.py
  8. 2
      mmdet/core/anchor/utils.py
  9. 35
      mmdet/core/bbox/assigners/center_region_assigner.py
  10. 4
      mmdet/core/bbox/samplers/sampling_result.py
  11. 6
      mmdet/core/mask/structures.py
  12. 4
      mmdet/core/mask/utils.py
  13. 2
      mmdet/core/post_processing/bbox_nms.py
  14. 8
      mmdet/core/utils/misc.py
  15. 16
      mmdet/datasets/cityscapes.py
  16. 10
      mmdet/datasets/coco.py
  17. 6
      mmdet/datasets/custom.py
  18. 18
      mmdet/datasets/pipelines/formating.py
  19. 79
      mmdet/datasets/pipelines/transforms.py
  20. 4
      mmdet/models/backbones/regnet.py
  21. 41
      mmdet/models/dense_heads/anchor_head.py
  22. 33
      mmdet/models/dense_heads/fcos_head.py
  23. 1
      mmdet/models/detectors/atss.py
  24. 15
      mmdet/models/detectors/base.py
  25. 8
      mmdet/models/losses/ghm_loss.py
  26. 4
      mmdet/models/losses/iou_loss.py
  27. 3
      mmdet/models/necks/bfp.py
  28. 6
      mmdet/models/necks/fpn.py
  29. 3
      mmdet/models/necks/hrfpn.py
  30. 5
      setup.cfg

@ -1,3 +1,3 @@
yapf -r -i --style .style.yapf mmdet/ configs/ tests/ tools/
yapf -r -i --style mmdet/ configs/ tests/ tools/
isort -rc mmdet/ configs/ tests/ tools/
flake8 .

@ -27,7 +27,7 @@ We use the following tools for linting and formatting:
- [yapf](https://github.com/google/yapf): formatter
- [isort](https://github.com/timothycrosley/isort): sort imports
Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg).
Style configurations of yapf and isort can be found in [setup.cfg](../setup.cfg).
We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`,
fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.

@ -1,4 +0,0 @@
[style]
BASED_ON_STYLE = pep8
BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true

@ -39,10 +39,7 @@ extensions = [
]
autodoc_mock_imports = [
'matplotlib', 'pycocotools', 'terminaltables', 'mmdet.version',
'mmdet.ops.corner_pool', 'mmdet.ops.dcn', 'mmdet.ops.masked_conv',
'mmdet.ops.nms', 'mmdet.ops.roi_align', 'mmdet.ops.roi_pool',
'mmdet.ops.sigmoid_focal_loss', 'mmdet.ops.carafe', 'mmdet.ops.utils'
'matplotlib', 'pycocotools', 'terminaltables', 'mmdet.version', 'mmcv.ops'
]
# Add any paths that contain templates here, relative to this directory.

@ -8,13 +8,19 @@ Welcome to MMDetection's documentation!
getting_started.md
config.md
model_zoo.md
tutorials/finetune.md
tutorials/new_dataset.md
tutorials/data_pipeline.md
tutorials/new_modules.md
compatibility.md
changelog.md
projects.md
.. toctree::
:maxdepth: 2
:caption: Tutorials
tutorials/index.rst
.. toctree::
:caption: API Reference
api.rst

@ -0,0 +1,7 @@
.. toctree::
:maxdepth: 2
finetune.md
new_dataset.md
data_pipeline.md
new_modules.md

@ -123,7 +123,7 @@ class AnchorGenerator(object):
"""Generate base anchors.
Returns:
list(torch.Tensor): Base anchors of a feature grid in multiple
list(torch.Tensor): Base anchors of a feature grid in multiple \
feature levels.
"""
multi_level_base_anchors = []
@ -155,7 +155,7 @@ class AnchorGenerator(object):
related to a single feature grid. Defaults to None.
Returns:
torch.Tensor: Anchors in a single-level feature maps
torch.Tensor: Anchors in a single-level feature maps.
"""
w = base_size
h = base_size
@ -212,10 +212,10 @@ class AnchorGenerator(object):
device (str): Device where the anchors will be put on.
Return:
list[torch.Tensor]: Anchors in multiple feature levels.
The sizes of each tensor should be [N, 4], where
N = width * height * num_base_anchors, width and height
are the sizes of the corresponding feature lavel,
list[torch.Tensor]: Anchors in multiple feature levels. \
The sizes of each tensor should be [N, 4], where \
N = width * height * num_base_anchors, width and height \
are the sizes of the corresponding feature lavel, \
num_base_anchors is the number of anchors for that level.
"""
assert self.num_levels == len(featmap_sizes)
@ -308,7 +308,7 @@ class AnchorGenerator(object):
Defaults to 'cuda'.
Returns:
torch.Tensor: The valid flags of each anchor in a single level
torch.Tensor: The valid flags of each anchor in a single level \
feature map.
"""
feat_h, feat_w = featmap_size
@ -433,7 +433,7 @@ class SSDAnchorGenerator(AnchorGenerator):
"""Generate base anchors.
Returns:
list(torch.Tensor): Base anchors of a feature grid in multiple
list(torch.Tensor): Base anchors of a feature grid in multiple \
feature levels.
"""
multi_level_base_anchors = []
@ -471,12 +471,13 @@ class SSDAnchorGenerator(AnchorGenerator):
class LegacyAnchorGenerator(AnchorGenerator):
"""Legacy anchor generator used in MMDetection V1.x.
Difference to the V2.0 anchor generator:
Note:
Difference to the V2.0 anchor generator:
1. The center offset of V1.x anchors are set to be 0.5 rather than 0.
2. The width/height are minused by 1 when calculating the anchors' centers
and corners to meet the V1.x coordinate system.
3. The anchors' corners are quantized.
1. The center offset of V1.x anchors are set to be 0.5 rather than 0.
2. The width/height are minused by 1 when calculating the anchors' \
centers and corners to meet the V1.x coordinate system.
3. The anchors' corners are quantized.
Args:
strides (list[int] | list[tuple[int]]): Strides of anchors
@ -523,7 +524,7 @@ class LegacyAnchorGenerator(AnchorGenerator):
"""Generate base anchors of a single level.
Note:
The width/height of anchors are minused by 1 when calculating
The width/height of anchors are minused by 1 when calculating \
the centers and corners to meet the V1.x coordinate system.
Args:

@ -31,7 +31,7 @@ def anchor_inside_flags(flat_anchors,
Defaults to 0.
Returns:
torch.Tensor: Flags indicating whether the anchors are inside a
torch.Tensor: Flags indicating whether the anchors are inside a \
valid range.
"""
img_h, img_w = img_shape[:2]

@ -108,7 +108,7 @@ class CenterRegionAssigner(BaseAssigner):
gt_bboxes (Tensor): Ground truth boxes, shape (k, 4).
Returns:
Tensor: The priority of gts so that gts with larger priority is
Tensor: The priority of gts so that gts with larger priority is \
more likely to be assigned. Shape (k, )
"""
gt_areas = bboxes_area(gt_bboxes)
@ -119,9 +119,10 @@ class CenterRegionAssigner(BaseAssigner):
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
"""Assign gt to bboxes.
This method assigns gts to every bbox (proposal/anchor), each bbox will
be assigned with -1, or a semi-positive number. -1 means negative
sample, semi-positive number is the index (0-based) of assigned gt.
This method assigns gts to every bbox (proposal/anchor), each bbox \
will be assigned with -1, or a semi-positive number. -1 means \
negative sample, semi-positive number is the index (0-based) of \
assigned gt.
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
@ -131,12 +132,13 @@ class CenterRegionAssigner(BaseAssigner):
gt_labels (tensor, optional): Label of gt_bboxes, shape (num_gts,).
Returns:
:obj:`AssignResult`: The assigned result. Note that shadowed_labels
of shape (N, 2) is also added as an `assign_result` attribute.
`shadowed_labels` is a tensor composed of N pairs of
[anchor_ind, class_label], where N is the number of anchors that
lie in the outer region of a gt, anchor_ind is the shadowed
anchor index and class_label is the shadowed class label.
:obj:`AssignResult`: The assigned result. Note that \
shadowed_labels of shape (N, 2) is also added as an \
`assign_result` attribute. `shadowed_labels` is a tensor \
composed of N pairs of anchor_ind, class_label], where N \
is the number of anchors that lie in the outer region of a \
gt, anchor_ind is the shadowed anchor index and class_label \
is the shadowed class label.
Example:
>>> self = CenterRegionAssigner(0.2, 0.2)
@ -261,11 +263,14 @@ class CenterRegionAssigner(BaseAssigner):
match with multiple gts. Shape: (num_gt, ).
Returns:
assigned_gt_inds: The assigned gt index of each prior bbox
(i.e. index from 1 to num_gts). Shape: (num_prior, ).
shadowed_gt_inds: shadowed gt indices. It is a tensor of shape
(num_ignore, 2) with first column being the shadowed prior bbox
indices and the second column the shadowed gt indices (1-based)
tuple: Returns (assigned_gt_inds, shadowed_gt_inds).
- assigned_gt_inds: The assigned gt index of each prior bbox \
(i.e. index from 1 to num_gts). Shape: (num_prior, ).
- shadowed_gt_inds: shadowed gt indices. It is a tensor of \
shape (num_ignore, 2) with first column being the \
shadowed prior bbox indices and the second column the \
shadowed gt indices (1-based).
"""
num_bboxes, num_gts = is_bbox_in_gt_core.shape

@ -97,9 +97,9 @@ class SamplingResult(util_mixins.NiceRepr):
kwargs (keyword arguments):
- num_preds: number of predicted boxes
- num_gts: number of true boxes
- p_ignore (float): probability of a predicted box assinged to
- p_ignore (float): probability of a predicted box assinged to \
an ignored truth.
- p_assigned (float): probability of a predicted box not being
- p_assigned (float): probability of a predicted box not being \
assigned.
- p_use_label (float | bool): with labels or not.

@ -507,9 +507,9 @@ class PolygonMasks(BaseInstanceMasks):
def areas(self):
"""Compute areas of masks.
This func is modified from
https://github.com/facebookresearch/detectron2/blob/ffff8acc35ea88ad1cb1806ab0f00b4c1c5dbfd9/detectron2/structures/masks.py#L387
Only works with Polygons, using the shoelace formula
This func is modified from `detectron2
<https://github.com/facebookresearch/detectron2/blob/ffff8acc35ea88ad1cb1806ab0f00b4c1c5dbfd9/detectron2/structures/masks.py#L387>`_.
The function only works with Polygons using the shoelace formula.
Return:
ndarray: areas of each instance

@ -17,8 +17,8 @@ def split_combined_polys(polys, poly_lens, polys_per_mask):
of each mask
Returns:
list: a list (length = image num) of list (length = mask num) of
list (length = poly num) of numpy array
list: a list (length = image num) of list (length = mask num) of \
list (length = poly num) of numpy array.
"""
mask_polys_list = []
for img_id in range(len(polys)):

@ -23,7 +23,7 @@ def multiclass_nms(multi_bboxes,
applying NMS
Returns:
tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels \
are 0-based.
"""
num_classes = multi_scores.size(1) - 1

@ -37,16 +37,16 @@ def multi_apply(func, *args, **kwargs):
Note:
This function applies the ``func`` to multiple inputs and
map the multiple outputs of the ``func`` into different
list. Each list contains the same type of outputs corresponding
to different inputs.
map the multiple outputs of the ``func`` into different
list. Each list contains the same type of outputs corresponding
to different inputs.
Args:
func (Function): A function that will be applied to a list of
arguments
Returns:
tuple(list): A tuple containing multiple list, each list contains
tuple(list): A tuple containing multiple list, each list contains \
a kind of returned results by the function
"""
pfunc = partial(func, **kwargs) if kwargs else func

@ -45,8 +45,8 @@ class CityscapesDataset(CocoDataset):
ann_info (list[dict]): Annotation info of an image.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, seg_map.
dict: A dict containing the following keys: bboxes, \
bboxes_ignore, labels, masks, seg_map. \
"masks" are already decoded into binary masks.
"""
gt_bboxes = []
@ -102,8 +102,8 @@ class CityscapesDataset(CocoDataset):
the txt files will be named "somepath/xxx.txt".
Returns:
list[str: str]: result txt files which contains corresponding
instance segmentation images.
list[str]: Result txt files which contains corresponding \
instance segmentation images.
"""
try:
import cityscapesscripts.helpers.labels as CSLabels
@ -168,8 +168,8 @@ class CityscapesDataset(CocoDataset):
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
tuple: (result_files, tmp_dir), result_files is a dict containing \
the json filepaths, tmp_dir is the temporal directory created \
for saving txt/png files when txtfile_prefix is not specified.
"""
assert isinstance(results, list), 'results must be a list'
@ -229,7 +229,7 @@ class CityscapesDataset(CocoDataset):
also be computed. Default: 0.5.
Returns:
dict[str, float]: COCO style evaluation metric or cityscapes mAP
dict[str, float]: COCO style evaluation metric or cityscapes mAP \
and AP@50.
"""
eval_results = dict()
@ -268,7 +268,7 @@ class CityscapesDataset(CocoDataset):
related information during evaluation. Default: None.
Returns:
dict[str: float]: Cityscapes evaluation results, contains 'mAP'
dict[str: float]: Cityscapes evaluation results, contains 'mAP' \
and 'AP@50'.
"""

@ -128,8 +128,8 @@ class CocoDataset(CustomDataset):
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, seg_map. "masks" are raw annotations and not
dict: A dict containing the following keys: bboxes, bboxes_ignore,\
labels, masks, seg_map. "masks" are raw annotations and not \
decoded into binary masks.
"""
gt_bboxes = []
@ -285,7 +285,7 @@ class CocoDataset(CustomDataset):
"somepath/xxx.proposal.json".
Returns:
dict[str: str]: Possible keys are "bbox", "segm", "proposal", and
dict[str: str]: Possible keys are "bbox", "segm", "proposal", and \
values are corresponding filenames.
"""
result_files = dict()
@ -344,8 +344,8 @@ class CocoDataset(CustomDataset):
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
tuple: (result_files, tmp_dir), result_files is a dict containing \
the json filepaths, tmp_dir is the temporal directory created \
for saving json files when jsonfile_prefix is not specified.
"""
assert isinstance(results, list), 'results must be a list'

@ -177,7 +177,7 @@ class CustomDataset(Dataset):
idx (int): Index of data.
Returns:
dict: Training/test data (with annotation if `test_mode` is set
dict: Training/test data (with annotation if `test_mode` is set \
True).
"""
@ -197,7 +197,7 @@ class CustomDataset(Dataset):
idx (int): Index of data.
Returns:
dict: Training data and annotation after pipeline with new keys
dict: Training data and annotation after pipeline with new keys \
introduced by pipeline.
"""
@ -216,7 +216,7 @@ class CustomDataset(Dataset):
idx (int): Index of data.
Returns:
dict: Testing data after pipeline with new keys intorduced by
dict: Testing data after pipeline with new keys intorduced by \
piepline.
"""

@ -119,7 +119,7 @@ class Transpose(object):
results (dict): Result dict contains the data to transpose.
Returns:
dict: The result dict contains the data transposed to
dict: The result dict contains the data transposed to \
``self.order``.
"""
for key in self.keys:
@ -156,7 +156,7 @@ class ToDataContainer(object):
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data converted to
dict: The result dict contains the data converted to \
:obj:`mmcv.DataContainer`.
"""
@ -184,7 +184,7 @@ class DefaultFormatBundle(object):
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \
(3)to DataContainer (stack=True)
"""
@ -195,7 +195,7 @@ class DefaultFormatBundle(object):
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
dict: The result dict contains the data that is formatted with \
default bundle.
"""
@ -258,9 +258,9 @@ class Collect(object):
The "img_meta" item is always populated. The contents of the "img_meta"
dictionary depends on "meta_keys". By default this includes:
- "img_shape": shape of the image input to the network as a tuple
(h, w, c). Note that images may be zero padded on the bottom/right
if the batch tensor is larger than this shape.
- "img_shape": shape of the image input to the network as a tuple \
(h, w, c). Note that images may be zero padded on the \
bottom/right if the batch tensor is larger than this shape.
- "scale_factor": a float indicating the preprocessing scale
@ -273,6 +273,7 @@ class Collect(object):
- "pad_shape": image shape after padding
- "img_norm_cfg": a dict of normalization information:
- mean - per channel mean subtraction
- std - per channel std divisor
- to_rgb - bool indicating if bgr was converted to rgb
@ -303,6 +304,7 @@ class Collect(object):
Returns:
dict: The result dict contains the following keys
- keys in``self.keys``
- ``img_metas``
"""
@ -349,7 +351,7 @@ class WrapFieldsToLists(object):
results (dict): Result dict contains the data to wrap.
Returns:
dict: The result dict where value of ``self.keys`` are wrapped
dict: The result dict where value of ``self.keys`` are wrapped \
into list.
"""

@ -36,11 +36,11 @@ class Resize(object):
`img_scale` can either be a tuple (single-scale) or a list of tuple
(multi-scale). There are 3 multiscale modes:
- ``ratio_range is not None``: randomly sample a ratio from the ratio range
and multiply it with the image scale.
- ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly
- ``ratio_range is not None``: randomly sample a ratio from the ratio \
range and multiply it with the image scale.
- ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
sample a scale from the multiscale range.
- ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly
- ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
sample a scale from multiple scales.
Args:
@ -84,8 +84,8 @@ class Resize(object):
img_scales (list[tuple]): Images scales for selection.
Returns:
(tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
where ``img_scale`` is the selected image scale and
(tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
where ``img_scale`` is the selected image scale and \
``scale_idx`` is the selected index in the given candidates.
"""
@ -104,8 +104,8 @@ class Resize(object):
and uper bound of image scales.
Returns:
(tuple, None): Returns a tuple ``(img_scale, None)``, where
``img_scale`` is sampled scale and None is just a placeholder
(tuple, None): Returns a tuple ``(img_scale, None)``, where \
``img_scale`` is sampled scale and None is just a placeholder \
to be consistent with :func:`random_select`.
"""
@ -135,9 +135,9 @@ class Resize(object):
the ``img_scale``.
Returns:
(tuple, None): Returns a tuple ``(scale, None)``, where
``scale`` is sampled ratio multiplied with ``img_scale`` and
None is just a placeholder to be consistent with
(tuple, None): Returns a tuple ``(scale, None)``, where \
``scale`` is sampled ratio multiplied with ``img_scale`` and \
None is just a placeholder to be consistent with \
:func:`random_select`.
"""
@ -162,7 +162,7 @@ class Resize(object):
results (dict): Result dict from :obj:`dataset`.
Returns:
dict: Two new keys 'scale` and 'scale_idx` are added into
dict: Two new keys 'scale` and 'scale_idx` are added into \
``results``, which would be used by subsequent pipelines.
"""
@ -244,7 +244,7 @@ class Resize(object):
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', \
'keep_ratio' keys are added into result dict.
"""
@ -332,8 +332,8 @@ class RandomFlip(object):
results (dict): Result dict from loading pipeline.
Returns:
dict: Flipped results, 'flip', 'flip_direction' keys are added into
result dict.
dict: Flipped results, 'flip', 'flip_direction' keys are added \
into result dict.
"""
if 'flip' not in results:
@ -486,7 +486,7 @@ class RandomCrop(object):
allow_negative_crop (bool): Whether to allow a crop that does not
contain any bbox area. Default to False.
Notes:
Note:
- If the image is smaller than the crop size, return the original image
- The keys for bboxes, labels and masks must be aligned. That is,
`gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and
@ -810,9 +810,9 @@ class MinIoURandomCrop(object):
min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w,
where a >= min_crop_size).
Notes:
The keys for bboxes, labels and masks should be paired. That is,
`gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and
Note:
The keys for bboxes, labels and masks should be paired. That is, \
`gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and \
`gt_bboxes_ignore` to `gt_labels_ignore` and `gt_masks_ignore`.
"""
@ -838,7 +838,7 @@ class MinIoURandomCrop(object):
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images and bounding boxes cropped,
dict: Result dict with images and bounding boxes cropped, \
'img_shape' key is updated.
"""
@ -1174,19 +1174,19 @@ class RandomCenterCropPad(object):
"""Random center crop and random around padding for CornerNet.
This operation generates randomly cropped image from the original image and
pads it simultaneously. Different from `RandomCrop`, the output shape may
not equal to `crop_size` strictly. We choose a random value from `ratios`
and the output shape could be larger or smaller than `crop_size`. Also the
pad in this operation is different from `Pad`, actually we use around
padding instead of right-bottom padding.
pads it simultaneously. Different from :class:`RandomCrop`, the output
shape may not equal to ``crop_size`` strictly. We choose a random value
from ``ratios`` and the output shape could be larger or smaller than
``crop_size``. The padding operation is also different from :class:`Pad`,
here we use around padding instead of right-bottom padding.
The relation between output image (padding image) and original image:
.. code-block: text
.. code-block::
output image
+----------------------------+
| padded area |
output image
+----------------------------+
| padded area |
+------|----------------------------|----------+
| | cropped area | |
| | +---------------+ | |
@ -1194,24 +1194,26 @@ class RandomCenterCropPad(object):
| | | range | | |
| | +---------------+ | |
+------|----------------------------|----------+
| padded area |
+----------------------------+
| padded area |
+----------------------------+
There are 5 main areas in the figure:
- output image: output image of this operation, also called padding
- output image: output image of this operation, also called padding \
image in following instruction.
- original image: input image of this operation.
- padded area: non-intersect area of output image and original image.
- cropped area: the overlap of output image and original image.
- center range: a smaller area where random center chosen from.
center range is computed by `border` and original image's shape
- center range: a smaller area where random center chosen from. \
center range is computed by `border` and original image's shape \
to avoid our random center is too close to original image's border.
Also this operation act differently in train and test mode, the summary
pipeline is listed below.
Train pipeline:
1. Choose a `random_ratio` from `ratios`, the shape of padding image
1. Choose a `random_ratio` from `ratios`, the shape of padding image \
will be `random_ratio * crop_size`.
2. Choose a `random_center` in `center range`.
3. Generate padding image with center matches the `random_center`.
@ -1220,8 +1222,9 @@ class RandomCenterCropPad(object):
6. Refine annotations.
Test pipeline:
1. Compute output shape according to `test_pad_mode`.
2. Generate padding image with center matches the original image
2. Generate padding image with center matches the original image \
center.
3. Initialize the padding image with pixel value equals to `mean`.
4. Copy the `cropped area` to padding image.
@ -1247,7 +1250,7 @@ class RandomCenterCropPad(object):
127 as padding shape value.
- 'logical_or': final_shape = input_shape | padding_shape_value
- 'size_divisor': final_shape = int(
- 'size_divisor': final_shape = int( \
ceil(input_shape / padding_shape_value) * padding_shape_value)
"""
@ -1316,6 +1319,7 @@ class RandomCenterCropPad(object):
Args:
patch (list[int]): The cropped area, [left, top, right, bottom].
boxes (numpy array, (N x 4)): Ground truth boxes.
Returns:
mask (numpy array, (N,)): Each box is inside or outside the patch.
"""
@ -1339,6 +1343,7 @@ class RandomCenterCropPad(object):
image (np array, H x W x C): Original image.
center (list[int]): Target crop center coord.
size (list[int]): Target crop size. [target_h, target_w]
Returns:
cropped_img (np array, target_h x target_w x C): Cropped image.
border (np array, 4): The distance of four border of `cropped_img`

@ -234,8 +234,8 @@ class RegNet(ResNet):
divisor (int, optional): The divisor of channels. Defaults to 8.
Returns:
list, int: return a list of widths of each stage and the number of
stages
list, int: return a list of widths of each stage and the number \
of stages
"""
assert width_slope >= 0
assert initial_width > 0

@ -119,9 +119,9 @@ class AnchorHead(BaseDenseHead):
Returns:
tuple:
cls_score (Tensor): Cls scores for a single scale level
cls_score (Tensor): Cls scores for a single scale level \
the channels number is num_anchors * num_classes.
bbox_pred (Tensor): Box energies / deltas for a single scale
bbox_pred (Tensor): Box energies / deltas for a single scale \
level, the channels number is num_anchors * 4.
"""
cls_score = self.conv_cls(x)
@ -136,13 +136,14 @@ class AnchorHead(BaseDenseHead):
a 4D-tensor.
Returns:
tuple: Usually a tuple of classification scores and bbox prediction
cls_scores (list[Tensor]): Classification scores for all scale
levels, each is a 4D-tensor, the channels number is
num_anchors * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for all scale
levels, each is a 4D-tensor, the channels number is
num_anchors * 4.
tuple: A tuple of classification scores and bbox prediction.
- cls_scores (list[Tensor]): Classification scores for all \
scale levels, each is a 4D-tensor, the channels number \
is num_anchors * num_classes.
- bbox_preds (list[Tensor]): Box energies / deltas for all \
scale levels, each is a 4D-tensor, the channels number \
is num_anchors * 4.
"""
return multi_apply(self.forward_single, feats)
@ -156,8 +157,8 @@ class AnchorHead(BaseDenseHead):
Returns:
tuple:
anchor_list (list[Tensor]): Anchors of each image
valid_flag_list (list[Tensor]): Valid flags of each image
anchor_list (list[Tensor]): Anchors of each image.
valid_flag_list (list[Tensor]): Valid flags of each image.
"""
num_imgs = len(img_metas)
@ -308,13 +309,17 @@ class AnchorHead(BaseDenseHead):
set of anchors.
Returns:
tuple:
labels_list (list[Tensor]): Labels of each level
label_weights_list (list[Tensor]): Label weights of each level
bbox_targets_list (list[Tensor]): BBox targets of each level
bbox_weights_list (list[Tensor]): BBox weights of each level
num_total_pos (int): Number of positive samples in all images
num_total_neg (int): Number of negative samples in all images
tuple: Usually returns a tuple containing learning targets.
- labels_list (list[Tensor]): Labels of each level.
- label_weights_list (list[Tensor]): Label weights of each \
level.
- bbox_targets_list (list[Tensor]): BBox targets of each level.
- bbox_weights_list (list[Tensor]): BBox weights of each level.
- num_total_pos (int): Number of positive samples in all \
images.
- num_total_neg (int): Number of negative samples in all \
images.
additional_returns: This function enables user-defined returns from
`self._get_targets_single`. These returns are currently refined
to properties at each feature map (i.e. having HxW dimension).

@ -45,6 +45,7 @@ class FCOSHead(AnchorFreeHead):
loss_centerness (dict): Config of centerness loss.
norm_cfg (dict): dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
Example:
>>> self = FCOSHead(11, 7)
>>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]
@ -108,13 +109,13 @@ class FCOSHead(AnchorFreeHead):
Returns:
tuple:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
cls_scores (list[Tensor]): Box scores for each scale level, \
each is a 4D-tensor, the channel number is \
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
bbox_preds (list[Tensor]): Box energies / deltas for each \
scale level, each is a 4D-tensor, the channel number is \
num_points * 4.
centernesses (list[Tensor]): Centerss for each scale level,
centernesses (list[Tensor]): Centerss for each scale level, \
each is a 4D-tensor, the channel number is num_points * 1.
"""
return multi_apply(self.forward_single, feats, self.scales,
@ -132,7 +133,7 @@ class FCOSHead(AnchorFreeHead):
is True.
Returns:
tuple: scores for each class, bbox predictions and centerness
tuple: scores for each class, bbox predictions and centerness \
predictions of input feature maps.
"""
cls_score, bbox_pred, cls_feat, reg_feat = super().forward_single(x)
@ -272,12 +273,12 @@ class FCOSHead(AnchorFreeHead):
rescale (bool): If True, return boxes in original image space
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of the
corresponding box.
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. \
The first item is an (n, 5) tensor, where the first 4 columns \
are bounding box positions (tl_x, tl_y, br_x, br_y) and the \
5-th column is a score between 0 and 1. The second item is a \
(n,) tensor where each item is the predicted class label of \
the corresponding box.
"""
assert len(cls_scores) == len(bbox_preds)
num_levels = len(cls_scores)
@ -335,8 +336,8 @@ class FCOSHead(AnchorFreeHead):
rescale (bool): If True, return boxes in original image space.
Returns:
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns \
are bounding box positions (tl_x, tl_y, br_x, br_y) and the \
5-th column is a score between 0 and 1.
"""
cfg = self.test_cfg if cfg is None else cfg
@ -408,8 +409,8 @@ class FCOSHead(AnchorFreeHead):
Returns:
tuple:
concat_lvl_labels (list[Tensor]): Labels of each level.
concat_lvl_bbox_targets (list[Tensor]): BBox targets of each
concat_lvl_labels (list[Tensor]): Labels of each level. \
concat_lvl_bbox_targets (list[Tensor]): BBox targets of each \
level.
"""
assert len(points) == len(self.regress_ranges)

@ -4,6 +4,7 @@ from .single_stage import SingleStageDetector
@DETECTORS.register_module()
class ATSS(SingleStageDetector):
"""Implementation of `ATSS <https://arxiv.org/abs/1912.02424>`_."""
def __init__(self,
backbone,

@ -180,8 +180,8 @@ class BaseDetector(nn.Module, metaclass=ABCMeta):
losses and other necessary infomation.
Returns:
tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor
which may be a weighted sum of all losses, log_vars contains
tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor \
which may be a weighted sum of all losses, log_vars contains \
all the variables to be sent to the logger.
"""
log_vars = OrderedDict()
@ -223,14 +223,15 @@ class BaseDetector(nn.Module, metaclass=ABCMeta):
and reserved.
Returns:
dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
dict: It should contain at least 3 keys: ``loss``, ``log_vars``, \
``num_samples``.
``loss`` is a tensor for back propagation, which can be a
- ``loss`` is a tensor for back propagation, which can be a \
weighted sum of multiple losses.
``log_vars`` contains all the variables to be sent to the
- ``log_vars`` contains all the variables to be sent to the
logger.
``num_samples`` indicates the batch size (when the model is
DDP, it means the batch size on each GPU), which is used for
- ``num_samples`` indicates the batch size (when the model is \
DDP, it means the batch size on each GPU), which is used for \
averaging the logs.
"""
losses = self(**data)

@ -22,8 +22,8 @@ class GHMC(nn.Module):
"""GHM Classification Loss.
Details of the theorem can be viewed in the paper
"Gradient Harmonized Single-stage Detector".
https://arxiv.org/abs/1811.05181
`Gradient Harmonized Single-stage Detector
<https://arxiv.org/abs/1811.05181>`_.
Args:
bins (int): Number of the unit regions for distribution calculation.
@ -100,8 +100,8 @@ class GHMR(nn.Module):
"""GHM Regression Loss.
Details of the theorem can be viewed in the paper
"Gradient Harmonized Single-stage Detector"
https://arxiv.org/abs/1811.05181
`Gradient Harmonized Single-stage Detector
<https://arxiv.org/abs/1811.05181>`_.
Args:
mu (float): The parameter for the Authentic Smooth L1 loss.

@ -29,8 +29,10 @@ def iou_loss(pred, target, eps=1e-6):
@weighted_loss
def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):
"""`Improving Object Localization with Fitness NMS and Bounded IoU Loss.
"""BIoULoss.
This is an implementation of paper
`Improving Object Localization with Fitness NMS and Bounded IoU Loss.
<https://arxiv.org/abs/1711.00164>`_.
Args:

@ -13,7 +13,8 @@ class BFP(nn.Module):
BFP takes multi-level features as inputs and gather them into a single one,
then refine the gathered feature and scatter the refined results to
multi-level features. This module is used in Libra R-CNN (CVPR 2019), see
https://arxiv.org/pdf/1904.02701.pdf for details.
the paper `Libra R-CNN: Towards Balanced Learning for Object Detection
<https://arxiv.org/abs/1904.02701>`_ for details.
Args:
in_channels (int): Number of input channels (feature maps of all levels

@ -8,10 +8,10 @@ from ..builder import NECKS
@NECKS.register_module()
class FPN(nn.Module):
"""Feature Pyramid Network.
r"""Feature Pyramid Network.
This is an implementation of - Feature Pyramid Networks for Object
Detection (https://arxiv.org/abs/1612.03144)
This is an implementation of paper `Feature Pyramid Networks for Object
Detection <https://arxiv.org/abs/1612.03144>`_.
Args:
in_channels (List[int]): Number of input channels per scale.

@ -11,7 +11,8 @@ from ..builder import NECKS
class HRFPN(nn.Module):
"""HRFPN (High Resolution Feature Pyrmamids)
arXiv: https://arxiv.org/abs/1904.04514
paper: `High-Resolution Representations for Labeling Pixels and Regions
<https://arxiv.org/abs/1904.04514>`_.
Args:
in_channels (list): number of channels for each branch.

@ -6,3 +6,8 @@ known_first_party = mmdet
known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
[yapf]
BASED_ON_STYLE = pep8
BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
Loading…
Cancel
Save