Source code for mmfewshot.detection.datasets.pipelines.transforms
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import math
from typing import Dict, List, Tuple
import mmcv
import numpy as np
from mmdet.datasets import PIPELINES
from mmdet.datasets.pipelines import (Normalize, Pad, RandomCrop, RandomFlip,
Resize)
# TODO: Simplify pipelines by decoupling operation.
[docs]@PIPELINES.register_module()
class CropResizeInstance:
"""Crop and resize instance according to bbox form image.
Args:
num_context_pixels (int): Padding pixel around instance. Default: 16.
target_size (tuple[int, int]): Resize cropped instance to target size.
Default: (320, 320).
"""
def __init__(
self,
num_context_pixels: int = 16,
target_size: Tuple[int] = (320, 320)
) -> None:
assert isinstance(num_context_pixels, int)
assert len(target_size) == 2, 'target_size'
self.num_context_pixels = num_context_pixels
self.target_size = target_size
def __call__(self, results: Dict) -> Dict:
"""Call function to flip bounding boxes, masks, semantic segmentation
maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Cropped and resized instance results.
"""
img = results['img']
gt_bbox = results['gt_bboxes']
img_h, img_w = img.shape[:2] # h, w
x1, y1, x2, y2 = list(map(int, gt_bbox.tolist()[0]))
bbox_w = x2 - x1
bbox_h = y2 - y1
t_x1, t_y1, t_x2, t_y2 = 0, 0, bbox_w, bbox_h
if bbox_w >= bbox_h:
crop_x1 = x1 - self.num_context_pixels
crop_x2 = x2 + self.num_context_pixels
# t_x1 and t_x2 will change when crop context or overflow
t_x1 = t_x1 + self.num_context_pixels
t_x2 = t_x1 + bbox_w
if crop_x1 < 0:
t_x1 = t_x1 + crop_x1
t_x2 = t_x1 + bbox_w
crop_x1 = 0
if crop_x2 > img_w:
crop_x2 = img_w
short_size = bbox_h
long_size = crop_x2 - crop_x1
y_center = int((y2 + y1) / 2) # math.ceil((y2 + y1) / 2)
crop_y1 = int(
y_center -
(long_size / 2)) # int(y_center - math.ceil(long_size / 2))
crop_y2 = int(
y_center +
(long_size / 2)) # int(y_center + math.floor(long_size / 2))
# t_y1 and t_y2 will change when crop context or overflow
t_y1 = t_y1 + math.ceil((long_size - short_size) / 2)
t_y2 = t_y1 + bbox_h
if crop_y1 < 0:
t_y1 = t_y1 + crop_y1
t_y2 = t_y1 + bbox_h
crop_y1 = 0
if crop_y2 > img_h:
crop_y2 = img_h
crop_short_size = crop_y2 - crop_y1
crop_long_size = crop_x2 - crop_x1
square = np.zeros((crop_long_size, crop_long_size, 3),
dtype=np.uint8)
delta = int(
(crop_long_size - crop_short_size) /
2) # int(math.ceil((crop_long_size - crop_short_size) / 2))
square_y1 = delta
square_y2 = delta + crop_short_size
t_y1 = t_y1 + delta
t_y2 = t_y2 + delta
crop_box = img[crop_y1:crop_y2, crop_x1:crop_x2, :]
square[square_y1:square_y2, :, :] = crop_box
else:
crop_y1 = y1 - self.num_context_pixels
crop_y2 = y2 + self.num_context_pixels
# t_y1 and t_y2 will change when crop context or overflow
t_y1 = t_y1 + self.num_context_pixels
t_y2 = t_y1 + bbox_h
if crop_y1 < 0:
t_y1 = t_y1 + crop_y1
t_y2 = t_y1 + bbox_h
crop_y1 = 0
if crop_y2 > img_h:
crop_y2 = img_h
short_size = bbox_w
long_size = crop_y2 - crop_y1
x_center = int((x2 + x1) / 2) # math.ceil((x2 + x1) / 2)
crop_x1 = int(
x_center -
(long_size / 2)) # int(x_center - math.ceil(long_size / 2))
crop_x2 = int(
x_center +
(long_size / 2)) # int(x_center + math.floor(long_size / 2))
# t_x1 and t_x2 will change when crop context or overflow
t_x1 = t_x1 + math.ceil((long_size - short_size) / 2)
t_x2 = t_x1 + bbox_w
if crop_x1 < 0:
t_x1 = t_x1 + crop_x1
t_x2 = t_x1 + bbox_w
crop_x1 = 0
if crop_x2 > img_w:
crop_x2 = img_w
crop_short_size = crop_x2 - crop_x1
crop_long_size = crop_y2 - crop_y1
square = np.zeros((crop_long_size, crop_long_size, 3),
dtype=np.uint8)
delta = int(
(crop_long_size - crop_short_size) /
2) # int(math.ceil((crop_long_size - crop_short_size) / 2))
square_x1 = delta
square_x2 = delta + crop_short_size
t_x1 = t_x1 + delta
t_x2 = t_x2 + delta
crop_box = img[crop_y1:crop_y2, crop_x1:crop_x2, :]
square[:, square_x1:square_x2, :] = crop_box
square = square.astype(np.float32, copy=False)
square, square_scale = mmcv.imrescale(
square, self.target_size, return_scale=True, backend='cv2')
square = square.astype(np.uint8)
t_x1 = int(t_x1 * square_scale)
t_y1 = int(t_y1 * square_scale)
t_x2 = int(t_x2 * square_scale)
t_y2 = int(t_y2 * square_scale)
results['img'] = square
results['img_shape'] = square.shape
results['gt_bboxes'] = np.array([[t_x1, t_y1, t_x2,
t_y2]]).astype(np.float32)
return results
def __repr__(self) -> str:
return self.__class__.__name__ + \
f'(num_context_pixels={self.num_context_pixels},' \
f' target_size={self.target_size})'
[docs]@PIPELINES.register_module()
class GenerateMask:
"""Resize support image and generate a mask.
Args:
target_size (tuple[int, int]): Crop and resize to target size.
Default: (224, 224).
"""
def __init__(self, target_size: Tuple[int] = (224, 224)) -> None:
self.target_size = target_size
def _resize_bboxes(self, results: Dict) -> None:
"""Resize bounding boxes with ``results['scale_factor']``."""
for key in results.get('bbox_fields', []):
bboxes = results[key] * results['scale_factor']
results[key] = bboxes
def _resize_img(self, results: Dict) -> None:
"""Resize images with ``results['scale']``."""
for key in results.get('img_fields', ['img']):
img, w_scale, h_scale = mmcv.imresize(
results[key],
self.target_size,
return_scale=True,
backend='cv2')
results[key] = img
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
results['img_shape'] = img.shape
# in case that there is no padding
results['pad_shape'] = img.shape
results['scale_factor'] = scale_factor
def _generate_mask(self, results: Dict) -> Dict:
mask = np.zeros(self.target_size, dtype=np.float32)
gt_bboxes = results['gt_bboxes'][0]
mask[int(gt_bboxes[1]):int(gt_bboxes[3]),
int(gt_bboxes[0]):int(gt_bboxes[2])] = 1
results['img'] = np.concatenate(
[results['img'], np.expand_dims(mask, axis=2)], axis=2)
results['img_shape'] = results['img'].shape
return results
def __call__(self, results: Dict) -> Dict:
"""Call function to flip bounding boxes, masks, semantic segmentation
maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized images with additional dimension of bbox mask.
"""
self._resize_img(results)
self._resize_bboxes(results)
self._generate_mask(results)
return results
def __repr__(self) -> str:
return self.__class__.__name__ + \
f'(num_context_pixels={self.num_context_pixels},' \
f' target_size={self.target_size})'
@PIPELINES.register_module()
class CropInstance:
"""Crop single instance according to bboxe to form an image.
Args:
context_ratio (float): Expand the gt_bboxes of instances to
(1 + context_ratio) times the original longest side.
Default: 0.
"""
def __init__(self, context_ratio: float = 0) -> None:
assert context_ratio > 0
self.context_ratio = context_ratio
def __call__(self, results: Dict) -> Dict:
"""Crop instance according to bbox form image, the padding region
outside the image will be set to zero.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Cropped instance results.
"""
img = results['img']
gt_bbox = results['gt_bboxes']
assert gt_bbox.shape[0] == 1, \
'CropInstance pipeline do not accept multiple gt_bboxes as input.'
h, w = img.shape[:2]
x1, y1, x2, y2 = gt_bbox[0].tolist()
crop_size = int(max(x2 - x1, y2 - y1) * (1 + self.context_ratio))
crop_img = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
old_x1 = int((x1 + x2 - crop_size) / 2)
old_y1 = int((y1 + y2 - crop_size) / 2)
x_shift = x1 - old_x1
y_shift = y1 - old_y1
new_x1 = 0 if old_x1 >= 0 else 0 - old_x1
new_y1 = 0 if old_y1 >= 0 else 0 - old_y1
old_x1 = max(0, old_x1)
old_y1 = max(0, old_y1)
old_x2 = min(w, int((x1 + x2 + crop_size) / 2))
old_y2 = min(h, int((y1 + y2 + crop_size) / 2))
new_x2 = new_x1 + old_x2 - old_x1
new_y2 = new_y1 + old_y2 - old_y1
crop_img[int(new_y1):int(new_y2), int(new_x1):int(new_x2)] = \
img[int(old_y1):int(old_y2), int(old_x1):int(old_x2)]
results['gt_bboxes'] = np.array(
[[x_shift, y_shift, x2 - x1 + x_shift,
y2 - y1 + y_shift]]).astype(np.float32)
results['img'] = crop_img
results['img_shape'] = crop_img.shape
return results
def __repr__(self) -> str:
return self.__class__.__name__ + f'(context_ratio={self.context_ratio}'
@PIPELINES.register_module()
class ResizeToMultiScale(Resize):
"""Resize images, bounding boxes, masks, semantic segmentation maps to
multiple scales.
Args:
multi_img_scales (list[tuple(int)]): Multiple scales to resize.
"""
def __init__(self, multi_scales: List[Tuple[int]], *args,
**kwargs) -> None:
super(ResizeToMultiScale, self).__init__(*args, **kwargs)
assert isinstance(multi_scales, list)
assert len(multi_scales) > 1
self.multi_scales = multi_scales
def __call__(self, results: Dict) -> List[Dict]:
"""Resize images, bounding boxes, masks, semantic segmentation map with
multiple scales and return a list of results at multiple scales.
Args:
results (dict): Result dict from loading pipeline.
Returns:
list[dict]: List of resized results, 'img_shape', 'pad_shape',
'scale_factor', 'keep_ratio' keys are added into each result
dict.
"""
results_list = []
for scale in self.multi_scales:
results_ = copy.deepcopy(results)
results_['scale'] = scale
self._resize_img(results_)
self._resize_bboxes(results_)
self._resize_masks(results_)
self._resize_seg(results_)
results_list.append(results_)
return results_list
def __repr__(self) -> str:
repr_str = self.__class__.__name__
repr_str += f'(multi_img_scales={self.multi_img_scales}, '
repr_str += f'ratio_range={self.ratio_range}, '
repr_str += f'keep_ratio={self.keep_ratio}, '
repr_str += f'bbox_clip_border={self.bbox_clip_border})'
return repr_str
@PIPELINES.register_module()
class MultiImageRandomCrop(RandomCrop):
"""Random crop the image & bboxes & masks for data at multiple scales.
The absolute `crop_size` is sampled based on `crop_type` and `image_size`,
then the cropped results are generated.
Note:
- If the image is smaller than the absolute crop size, return the
original image.
- The keys for bboxes, labels and masks must be aligned. That is,
`gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and
`gt_bboxes_ignore` corresponds to `gt_labels_ignore` and
`gt_masks_ignore`.
- If the crop does not contain any gt-bbox region and
`allow_negative_crop` is set to False, skip this image.
Args:
multi_crop_sizes (list[tuple(int)]): Crop size of each scales.
allow_negative_crop (bool): Whether to allow a crop that does
not contain any bbox area. Default: False.
bbox_clip_border (bool): Whether clip the objects outside
the border of the image. Default: True.
"""
def __init__(self,
multi_crop_sizes: List[Tuple[int]],
allow_negative_crop: bool = False,
bbox_clip_border: bool = True) -> None:
assert isinstance(multi_crop_sizes, list)
assert len(multi_crop_sizes) > 1
self.multi_crop_sizes = multi_crop_sizes
self.allow_negative_crop = allow_negative_crop
self.bbox_clip_border = bbox_clip_border
# The key correspondence from bboxes to labels and masks.
self.bbox2label = {
'gt_bboxes': 'gt_labels',
'gt_bboxes_ignore': 'gt_labels_ignore'
}
self.bbox2mask = {
'gt_bboxes': 'gt_masks',
'gt_bboxes_ignore': 'gt_masks_ignore'
}
def __call__(self, results_list: List[Dict]) -> List[Dict]:
"""Randomly crop image, bounding boxes, masks, semantic segmentation
maps of each results in `results_list`.
Args:
results_list (list[dict]): List of result dict from loading
pipeline.
Returns:
list[dict]: Randomly cropped `results_list`, 'img_shape' key in
each result dict is updated with corresponding crop size.
"""
for results, crop_size in zip(results_list, self.multi_crop_sizes):
h, w = results['img'].shape[:2]
crop_size = (min(crop_size[0], h), min(crop_size[1], w))
self._crop_data(results, crop_size, self.allow_negative_crop)
return results_list
def __repr__(self) -> str:
repr_str = self.__class__.__name__
repr_str += f'(multi_crop_sizes={self.multi_crop_sizes}, '
repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
repr_str += f'bbox_clip_border={self.bbox_clip_border})'
return repr_str
@PIPELINES.register_module()
class MultiImageRandomFlip(RandomFlip):
def __call__(self, results_list: List[Dict]) -> List[Dict]:
"""Random Flip image of each results in `results_list`.
Args:
results_list (list[dict]): List of result dict from
loading pipeline.
Returns:
list[dict]: List of normalized results, 'img_norm_cfg' key
is added into each result dict.
"""
for results in results_list:
super().__call__(results)
return results_list
@PIPELINES.register_module()
class MultiImageNormalize(Normalize):
def __call__(self, results_list: List[Dict]) -> List[Dict]:
"""Normalize image of each results in `results_list`.
Args:
results_list (list[dict]): List of result dict from
loading pipeline.
Returns:
list[dict]: List of normalized results, 'img_norm_cfg' key
is added into each result dict.
"""
for results in results_list:
super().__call__(results)
return results_list
@PIPELINES.register_module()
class MultiImagePad(Pad):
def __call__(self, results_list: List[Dict]) -> List[Dict]:
"""Pad images, masks, semantic segmentation maps of each results in
`results_list`.
Args:
results_list (list[dict]): List of result dict from
loading pipeline.
Returns:
list[dict]: List of padded results.
"""
for results in results_list:
super().__call__(results)
return results_list