Shortcuts

Source code for mmfewshot.detection.datasets.pipelines.transforms

# Copyright (c) OpenMMLab. All rights reserved.
import copy
import math
from typing import Dict, List, Tuple

import mmcv
import numpy as np
from mmdet.datasets import PIPELINES
from mmdet.datasets.pipelines import (Normalize, Pad, RandomCrop, RandomFlip,
                                      Resize)


# TODO: Simplify pipelines by decoupling operation.
[docs]@PIPELINES.register_module() class CropResizeInstance: """Crop and resize instance according to bbox form image. Args: num_context_pixels (int): Padding pixel around instance. Default: 16. target_size (tuple[int, int]): Resize cropped instance to target size. Default: (320, 320). """ def __init__( self, num_context_pixels: int = 16, target_size: Tuple[int] = (320, 320) ) -> None: assert isinstance(num_context_pixels, int) assert len(target_size) == 2, 'target_size' self.num_context_pixels = num_context_pixels self.target_size = target_size def __call__(self, results: Dict) -> Dict: """Call function to flip bounding boxes, masks, semantic segmentation maps. Args: results (dict): Result dict from loading pipeline. Returns: dict: Cropped and resized instance results. """ img = results['img'] gt_bbox = results['gt_bboxes'] img_h, img_w = img.shape[:2] # h, w x1, y1, x2, y2 = list(map(int, gt_bbox.tolist()[0])) bbox_w = x2 - x1 bbox_h = y2 - y1 t_x1, t_y1, t_x2, t_y2 = 0, 0, bbox_w, bbox_h if bbox_w >= bbox_h: crop_x1 = x1 - self.num_context_pixels crop_x2 = x2 + self.num_context_pixels # t_x1 and t_x2 will change when crop context or overflow t_x1 = t_x1 + self.num_context_pixels t_x2 = t_x1 + bbox_w if crop_x1 < 0: t_x1 = t_x1 + crop_x1 t_x2 = t_x1 + bbox_w crop_x1 = 0 if crop_x2 > img_w: crop_x2 = img_w short_size = bbox_h long_size = crop_x2 - crop_x1 y_center = int((y2 + y1) / 2) # math.ceil((y2 + y1) / 2) crop_y1 = int( y_center - (long_size / 2)) # int(y_center - math.ceil(long_size / 2)) crop_y2 = int( y_center + (long_size / 2)) # int(y_center + math.floor(long_size / 2)) # t_y1 and t_y2 will change when crop context or overflow t_y1 = t_y1 + math.ceil((long_size - short_size) / 2) t_y2 = t_y1 + bbox_h if crop_y1 < 0: t_y1 = t_y1 + crop_y1 t_y2 = t_y1 + bbox_h crop_y1 = 0 if crop_y2 > img_h: crop_y2 = img_h crop_short_size = crop_y2 - crop_y1 crop_long_size = crop_x2 - crop_x1 square = np.zeros((crop_long_size, crop_long_size, 3), dtype=np.uint8) delta = int( (crop_long_size - crop_short_size) / 2) # int(math.ceil((crop_long_size - crop_short_size) / 2)) square_y1 = delta square_y2 = delta + crop_short_size t_y1 = t_y1 + delta t_y2 = t_y2 + delta crop_box = img[crop_y1:crop_y2, crop_x1:crop_x2, :] square[square_y1:square_y2, :, :] = crop_box else: crop_y1 = y1 - self.num_context_pixels crop_y2 = y2 + self.num_context_pixels # t_y1 and t_y2 will change when crop context or overflow t_y1 = t_y1 + self.num_context_pixels t_y2 = t_y1 + bbox_h if crop_y1 < 0: t_y1 = t_y1 + crop_y1 t_y2 = t_y1 + bbox_h crop_y1 = 0 if crop_y2 > img_h: crop_y2 = img_h short_size = bbox_w long_size = crop_y2 - crop_y1 x_center = int((x2 + x1) / 2) # math.ceil((x2 + x1) / 2) crop_x1 = int( x_center - (long_size / 2)) # int(x_center - math.ceil(long_size / 2)) crop_x2 = int( x_center + (long_size / 2)) # int(x_center + math.floor(long_size / 2)) # t_x1 and t_x2 will change when crop context or overflow t_x1 = t_x1 + math.ceil((long_size - short_size) / 2) t_x2 = t_x1 + bbox_w if crop_x1 < 0: t_x1 = t_x1 + crop_x1 t_x2 = t_x1 + bbox_w crop_x1 = 0 if crop_x2 > img_w: crop_x2 = img_w crop_short_size = crop_x2 - crop_x1 crop_long_size = crop_y2 - crop_y1 square = np.zeros((crop_long_size, crop_long_size, 3), dtype=np.uint8) delta = int( (crop_long_size - crop_short_size) / 2) # int(math.ceil((crop_long_size - crop_short_size) / 2)) square_x1 = delta square_x2 = delta + crop_short_size t_x1 = t_x1 + delta t_x2 = t_x2 + delta crop_box = img[crop_y1:crop_y2, crop_x1:crop_x2, :] square[:, square_x1:square_x2, :] = crop_box square = square.astype(np.float32, copy=False) square, square_scale = mmcv.imrescale( square, self.target_size, return_scale=True, backend='cv2') square = square.astype(np.uint8) t_x1 = int(t_x1 * square_scale) t_y1 = int(t_y1 * square_scale) t_x2 = int(t_x2 * square_scale) t_y2 = int(t_y2 * square_scale) results['img'] = square results['img_shape'] = square.shape results['gt_bboxes'] = np.array([[t_x1, t_y1, t_x2, t_y2]]).astype(np.float32) return results def __repr__(self) -> str: return self.__class__.__name__ + \ f'(num_context_pixels={self.num_context_pixels},' \ f' target_size={self.target_size})'
[docs]@PIPELINES.register_module() class GenerateMask: """Resize support image and generate a mask. Args: target_size (tuple[int, int]): Crop and resize to target size. Default: (224, 224). """ def __init__(self, target_size: Tuple[int] = (224, 224)) -> None: self.target_size = target_size def _resize_bboxes(self, results: Dict) -> None: """Resize bounding boxes with ``results['scale_factor']``.""" for key in results.get('bbox_fields', []): bboxes = results[key] * results['scale_factor'] results[key] = bboxes def _resize_img(self, results: Dict) -> None: """Resize images with ``results['scale']``.""" for key in results.get('img_fields', ['img']): img, w_scale, h_scale = mmcv.imresize( results[key], self.target_size, return_scale=True, backend='cv2') results[key] = img scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img_shape'] = img.shape # in case that there is no padding results['pad_shape'] = img.shape results['scale_factor'] = scale_factor def _generate_mask(self, results: Dict) -> Dict: mask = np.zeros(self.target_size, dtype=np.float32) gt_bboxes = results['gt_bboxes'][0] mask[int(gt_bboxes[1]):int(gt_bboxes[3]), int(gt_bboxes[0]):int(gt_bboxes[2])] = 1 results['img'] = np.concatenate( [results['img'], np.expand_dims(mask, axis=2)], axis=2) results['img_shape'] = results['img'].shape return results def __call__(self, results: Dict) -> Dict: """Call function to flip bounding boxes, masks, semantic segmentation maps. Args: results (dict): Result dict from loading pipeline. Returns: dict: Resized images with additional dimension of bbox mask. """ self._resize_img(results) self._resize_bboxes(results) self._generate_mask(results) return results def __repr__(self) -> str: return self.__class__.__name__ + \ f'(num_context_pixels={self.num_context_pixels},' \ f' target_size={self.target_size})'
@PIPELINES.register_module() class CropInstance: """Crop single instance according to bboxe to form an image. Args: context_ratio (float): Expand the gt_bboxes of instances to (1 + context_ratio) times the original longest side. Default: 0. """ def __init__(self, context_ratio: float = 0) -> None: assert context_ratio > 0 self.context_ratio = context_ratio def __call__(self, results: Dict) -> Dict: """Crop instance according to bbox form image, the padding region outside the image will be set to zero. Args: results (dict): Result dict from loading pipeline. Returns: dict: Cropped instance results. """ img = results['img'] gt_bbox = results['gt_bboxes'] assert gt_bbox.shape[0] == 1, \ 'CropInstance pipeline do not accept multiple gt_bboxes as input.' h, w = img.shape[:2] x1, y1, x2, y2 = gt_bbox[0].tolist() crop_size = int(max(x2 - x1, y2 - y1) * (1 + self.context_ratio)) crop_img = np.zeros((crop_size, crop_size, 3), dtype=np.uint8) old_x1 = int((x1 + x2 - crop_size) / 2) old_y1 = int((y1 + y2 - crop_size) / 2) x_shift = x1 - old_x1 y_shift = y1 - old_y1 new_x1 = 0 if old_x1 >= 0 else 0 - old_x1 new_y1 = 0 if old_y1 >= 0 else 0 - old_y1 old_x1 = max(0, old_x1) old_y1 = max(0, old_y1) old_x2 = min(w, int((x1 + x2 + crop_size) / 2)) old_y2 = min(h, int((y1 + y2 + crop_size) / 2)) new_x2 = new_x1 + old_x2 - old_x1 new_y2 = new_y1 + old_y2 - old_y1 crop_img[int(new_y1):int(new_y2), int(new_x1):int(new_x2)] = \ img[int(old_y1):int(old_y2), int(old_x1):int(old_x2)] results['gt_bboxes'] = np.array( [[x_shift, y_shift, x2 - x1 + x_shift, y2 - y1 + y_shift]]).astype(np.float32) results['img'] = crop_img results['img_shape'] = crop_img.shape return results def __repr__(self) -> str: return self.__class__.__name__ + f'(context_ratio={self.context_ratio}' @PIPELINES.register_module() class ResizeToMultiScale(Resize): """Resize images, bounding boxes, masks, semantic segmentation maps to multiple scales. Args: multi_img_scales (list[tuple(int)]): Multiple scales to resize. """ def __init__(self, multi_scales: List[Tuple[int]], *args, **kwargs) -> None: super(ResizeToMultiScale, self).__init__(*args, **kwargs) assert isinstance(multi_scales, list) assert len(multi_scales) > 1 self.multi_scales = multi_scales def __call__(self, results: Dict) -> List[Dict]: """Resize images, bounding boxes, masks, semantic segmentation map with multiple scales and return a list of results at multiple scales. Args: results (dict): Result dict from loading pipeline. Returns: list[dict]: List of resized results, 'img_shape', 'pad_shape', 'scale_factor', 'keep_ratio' keys are added into each result dict. """ results_list = [] for scale in self.multi_scales: results_ = copy.deepcopy(results) results_['scale'] = scale self._resize_img(results_) self._resize_bboxes(results_) self._resize_masks(results_) self._resize_seg(results_) results_list.append(results_) return results_list def __repr__(self) -> str: repr_str = self.__class__.__name__ repr_str += f'(multi_img_scales={self.multi_img_scales}, ' repr_str += f'ratio_range={self.ratio_range}, ' repr_str += f'keep_ratio={self.keep_ratio}, ' repr_str += f'bbox_clip_border={self.bbox_clip_border})' return repr_str @PIPELINES.register_module() class MultiImageRandomCrop(RandomCrop): """Random crop the image & bboxes & masks for data at multiple scales. The absolute `crop_size` is sampled based on `crop_type` and `image_size`, then the cropped results are generated. Note: - If the image is smaller than the absolute crop size, return the original image. - The keys for bboxes, labels and masks must be aligned. That is, `gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and `gt_bboxes_ignore` corresponds to `gt_labels_ignore` and `gt_masks_ignore`. - If the crop does not contain any gt-bbox region and `allow_negative_crop` is set to False, skip this image. Args: multi_crop_sizes (list[tuple(int)]): Crop size of each scales. allow_negative_crop (bool): Whether to allow a crop that does not contain any bbox area. Default: False. bbox_clip_border (bool): Whether clip the objects outside the border of the image. Default: True. """ def __init__(self, multi_crop_sizes: List[Tuple[int]], allow_negative_crop: bool = False, bbox_clip_border: bool = True) -> None: assert isinstance(multi_crop_sizes, list) assert len(multi_crop_sizes) > 1 self.multi_crop_sizes = multi_crop_sizes self.allow_negative_crop = allow_negative_crop self.bbox_clip_border = bbox_clip_border # The key correspondence from bboxes to labels and masks. self.bbox2label = { 'gt_bboxes': 'gt_labels', 'gt_bboxes_ignore': 'gt_labels_ignore' } self.bbox2mask = { 'gt_bboxes': 'gt_masks', 'gt_bboxes_ignore': 'gt_masks_ignore' } def __call__(self, results_list: List[Dict]) -> List[Dict]: """Randomly crop image, bounding boxes, masks, semantic segmentation maps of each results in `results_list`. Args: results_list (list[dict]): List of result dict from loading pipeline. Returns: list[dict]: Randomly cropped `results_list`, 'img_shape' key in each result dict is updated with corresponding crop size. """ for results, crop_size in zip(results_list, self.multi_crop_sizes): h, w = results['img'].shape[:2] crop_size = (min(crop_size[0], h), min(crop_size[1], w)) self._crop_data(results, crop_size, self.allow_negative_crop) return results_list def __repr__(self) -> str: repr_str = self.__class__.__name__ repr_str += f'(multi_crop_sizes={self.multi_crop_sizes}, ' repr_str += f'allow_negative_crop={self.allow_negative_crop}, ' repr_str += f'bbox_clip_border={self.bbox_clip_border})' return repr_str @PIPELINES.register_module() class MultiImageRandomFlip(RandomFlip): def __call__(self, results_list: List[Dict]) -> List[Dict]: """Random Flip image of each results in `results_list`. Args: results_list (list[dict]): List of result dict from loading pipeline. Returns: list[dict]: List of normalized results, 'img_norm_cfg' key is added into each result dict. """ for results in results_list: super().__call__(results) return results_list @PIPELINES.register_module() class MultiImageNormalize(Normalize): def __call__(self, results_list: List[Dict]) -> List[Dict]: """Normalize image of each results in `results_list`. Args: results_list (list[dict]): List of result dict from loading pipeline. Returns: list[dict]: List of normalized results, 'img_norm_cfg' key is added into each result dict. """ for results in results_list: super().__call__(results) return results_list @PIPELINES.register_module() class MultiImagePad(Pad): def __call__(self, results_list: List[Dict]) -> List[Dict]: """Pad images, masks, semantic segmentation maps of each results in `results_list`. Args: results_list (list[dict]): List of result dict from loading pipeline. Returns: list[dict]: List of padded results. """ for results in results_list: super().__call__(results) return results_list
Read the Docs v: latest
Versions
latest
stable
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.