Allow XML dataset generalization (#5943)

pull/6010/head
guigarfr 4 years ago committed by GitHub
parent 7bd39044f3
commit 0fcea45bc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 23
      mmdet/datasets/xml_style.py
  2. BIN
      tests/data/custom_dataset/images/000001.jpg
  3. 44
      tests/data/custom_dataset/images/000001.xml
  4. 1
      tests/data/custom_dataset/test.txt
  5. 1
      tests/data/custom_dataset/trainval.txt
  6. 50
      tests/test_data/test_datasets/test_custom_dataset.py

@ -18,11 +18,19 @@ class XMLDataset(CustomDataset):
min_size (int | float, optional): The minimum size of bounding min_size (int | float, optional): The minimum size of bounding
boxes in the images. If the size of a bounding box is less than boxes in the images. If the size of a bounding box is less than
``min_size``, it would be add to ignored field. ``min_size``, it would be add to ignored field.
img_subdir (str): Subdir where images are stored. Default: JPEGImages.
ann_subdir (str): Subdir where annotations are. Default: Annotations.
""" """
def __init__(self, min_size=None, **kwargs): def __init__(self,
min_size=None,
img_subdir='JPEGImages',
ann_subdir='Annotations',
**kwargs):
assert self.CLASSES or kwargs.get( assert self.CLASSES or kwargs.get(
'classes', None), 'CLASSES in `XMLDataset` can not be None.' 'classes', None), 'CLASSES in `XMLDataset` can not be None.'
self.img_subdir = img_subdir
self.ann_subdir = ann_subdir
super(XMLDataset, self).__init__(**kwargs) super(XMLDataset, self).__init__(**kwargs)
self.cat2label = {cat: i for i, cat in enumerate(self.CLASSES)} self.cat2label = {cat: i for i, cat in enumerate(self.CLASSES)}
self.min_size = min_size self.min_size = min_size
@ -40,8 +48,8 @@ class XMLDataset(CustomDataset):
data_infos = [] data_infos = []
img_ids = mmcv.list_from_file(ann_file) img_ids = mmcv.list_from_file(ann_file)
for img_id in img_ids: for img_id in img_ids:
filename = f'JPEGImages/{img_id}.jpg' filename = osp.join(self.img_subdir, f'{img_id}.jpg')
xml_path = osp.join(self.img_prefix, 'Annotations', xml_path = osp.join(self.img_prefix, self.ann_subdir,
f'{img_id}.xml') f'{img_id}.xml')
tree = ET.parse(xml_path) tree = ET.parse(xml_path)
root = tree.getroot() root = tree.getroot()
@ -50,8 +58,7 @@ class XMLDataset(CustomDataset):
width = int(size.find('width').text) width = int(size.find('width').text)
height = int(size.find('height').text) height = int(size.find('height').text)
else: else:
img_path = osp.join(self.img_prefix, 'JPEGImages', img_path = osp.join(self.img_prefix, filename)
'{}.jpg'.format(img_id))
img = Image.open(img_path) img = Image.open(img_path)
width, height = img.size width, height = img.size
data_infos.append( data_infos.append(
@ -67,7 +74,7 @@ class XMLDataset(CustomDataset):
continue continue
if self.filter_empty_gt: if self.filter_empty_gt:
img_id = img_info['id'] img_id = img_info['id']
xml_path = osp.join(self.img_prefix, 'Annotations', xml_path = osp.join(self.img_prefix, self.ann_subdir,
f'{img_id}.xml') f'{img_id}.xml')
tree = ET.parse(xml_path) tree = ET.parse(xml_path)
root = tree.getroot() root = tree.getroot()
@ -91,7 +98,7 @@ class XMLDataset(CustomDataset):
""" """
img_id = self.data_infos[idx]['id'] img_id = self.data_infos[idx]['id']
xml_path = osp.join(self.img_prefix, 'Annotations', f'{img_id}.xml') xml_path = osp.join(self.img_prefix, self.ann_subdir, f'{img_id}.xml')
tree = ET.parse(xml_path) tree = ET.parse(xml_path)
root = tree.getroot() root = tree.getroot()
bboxes = [] bboxes = []
@ -158,7 +165,7 @@ class XMLDataset(CustomDataset):
cat_ids = [] cat_ids = []
img_id = self.data_infos[idx]['id'] img_id = self.data_infos[idx]['id']
xml_path = osp.join(self.img_prefix, 'Annotations', f'{img_id}.xml') xml_path = osp.join(self.img_prefix, self.ann_subdir, f'{img_id}.xml')
tree = ET.parse(xml_path) tree = ET.parse(xml_path)
root = tree.getroot() root = tree.getroot()
for obj in root.findall('object'): for obj in root.findall('object'):

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

@ -0,0 +1,44 @@
<annotation>
<folder>VOC2007</folder>
<filename>000001.jpg</filename>
<source>
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
<flickrid>341012865</flickrid>
</source>
<owner>
<flickrid>Fried Camels</flickrid>
<name>Jinky the Fruit Bat</name>
</owner>
<size>
<width>353</width>
<height>500</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>dog</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>48</xmin>
<ymin>240</ymin>
<xmax>195</xmax>
<ymax>371</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>8</xmin>
<ymin>12</ymin>
<xmax>352</xmax>
<ymax>498</ymax>
</bndbox>
</object>
</annotation>

@ -1,4 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import os
import unittest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
@ -87,3 +89,51 @@ def test_custom_classes_override_default(dataset):
assert custom_dataset.CLASSES != original_classes assert custom_dataset.CLASSES != original_classes
assert custom_dataset.CLASSES == ['bus', 'car'] assert custom_dataset.CLASSES == ['bus', 'car']
print(custom_dataset) print(custom_dataset)
class CustomDatasetTests(unittest.TestCase):
def setUp(self):
super().setUp()
self.data_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
'data')
self.dataset_class = DATASETS.get('XMLDataset')
def test_data_infos__default_db_directories(self):
"""Test correct data read having a Pacal-VOC directory structure."""
test_dataset_root = os.path.join(self.data_dir, 'VOCdevkit', 'VOC2007')
custom_ds = self.dataset_class(
data_root=test_dataset_root,
ann_file=os.path.join(test_dataset_root, 'ImageSets', 'Main',
'trainval.txt'),
pipeline=[],
classes=('person', 'dog'),
test_mode=True)
self.assertListEqual([{
'id': '000001',
'filename': 'JPEGImages/000001.jpg',
'width': 353,
'height': 500
}], custom_ds.data_infos)
def test_data_infos__overridden_db_subdirectories(self):
"""Test correct data read having a customized directory structure."""
test_dataset_root = os.path.join(self.data_dir, 'custom_dataset')
custom_ds = self.dataset_class(
data_root=test_dataset_root,
ann_file=os.path.join(test_dataset_root, 'trainval.txt'),
pipeline=[],
classes=('person', 'dog'),
test_mode=True,
img_prefix='',
img_subdir='images',
ann_subdir='images')
self.assertListEqual([{
'id': '000001',
'filename': 'images/000001.jpg',
'width': 353,
'height': 500
}], custom_ds.data_infos)

Loading…
Cancel
Save