Source code for detectron2.evaluation.lvis_evaluation

# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import itertools
import json
import logging
import os
import pickle
from collections import OrderedDict
import torch

import detectron2.utils.comm as comm
from detectron2.config import CfgNode
from import MetadataCatalog
from detectron2.structures import Boxes, BoxMode, pairwise_iou
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import create_small_table

from .coco_evaluation import instances_to_coco_json
from .evaluator import DatasetEvaluator

[docs]class LVISEvaluator(DatasetEvaluator): """ Evaluate object proposal and instance detection/segmentation outputs using LVIS's metrics and evaluation API. """
[docs] def __init__( self, dataset_name, tasks=None, distributed=True, output_dir=None, *, max_dets_per_image=None, ): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have the following corresponding metadata: "json_file": the path to the LVIS format annotation tasks (tuple[str]): tasks that can be evaluated under the given configuration. A task is one of "bbox", "segm". By default, will infer this automatically from predictions. distributed (True): if True, will collect results from all ranks for evaluation. Otherwise, will evaluate the results in the current process. output_dir (str): optional, an output directory to dump results. max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP This limit, by default of the LVIS dataset, is 300. """ from lvis import LVIS self._logger = logging.getLogger(__name__) if tasks is not None and isinstance(tasks, CfgNode): self._logger.warn( "COCO Evaluator instantiated using config, this is deprecated behavior." " Please pass in explicit arguments instead." ) self._tasks = None # Infering it from predictions should be better else: self._tasks = tasks self._distributed = distributed self._output_dir = output_dir self._max_dets_per_image = max_dets_per_image self._cpu_device = torch.device("cpu") self._metadata = MetadataCatalog.get(dataset_name) json_file = PathManager.get_local_path(self._metadata.json_file) self._lvis_api = LVIS(json_file) # Test set json files do not contain annotations (evaluation must be # performed using the LVIS evaluation server). self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
[docs] def reset(self): self._predictions = []
[docs] def process(self, inputs, outputs): """ Args: inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a LVIS model. It is a list of dicts with key "instances" that contains :class:`Instances`. """ for input, output in zip(inputs, outputs): prediction = {"image_id": input["image_id"]} if "instances" in output: instances = output["instances"].to(self._cpu_device) prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) if "proposals" in output: prediction["proposals"] = output["proposals"].to(self._cpu_device) self._predictions.append(prediction)
[docs] def evaluate(self): if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return else: predictions = self._predictions if len(predictions) == 0: self._logger.warning("[LVISEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with, "wb") as f:, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(predictions) # Copy so the caller can do whatever with results return copy.deepcopy(self._results)
def _tasks_from_predictions(self, predictions): for pred in predictions: if "segmentation" in pred: return ("bbox", "segm") return ("bbox",) def _eval_predictions(self, predictions): """ Evaluate predictions. Fill self._results with the metrics of the tasks. Args: predictions (list[dict]): list of outputs from the model """"Preparing results in the LVIS format ...") lvis_results = list(itertools.chain(*[x["instances"] for x in predictions])) tasks = self._tasks or self._tasks_from_predictions(lvis_results) # LVIS evaluator can be used to evaluate results for COCO dataset categories. # In this case `_metadata` variable will have a field with COCO-specific category mapping. if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = { v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() } for result in lvis_results: result["category_id"] = reverse_id_mapping[result["category_id"]] else: # unmap the category ids for LVIS (from 0-indexed to 1-indexed) for result in lvis_results: result["category_id"] += 1 if self._output_dir: file_path = os.path.join(self._output_dir, "lvis_instances_results.json")"Saving results to {}".format(file_path)) with, "w") as f: f.write(json.dumps(lvis_results)) f.flush() if not self._do_evaluation:"Annotations are not available for evaluation.") return"Evaluating predictions ...") for task in sorted(tasks): res = _evaluate_predictions_on_lvis( self._lvis_api, lvis_results, task, max_dets_per_image=self._max_dets_per_image, class_names=self._metadata.get("thing_classes"), ) self._results[task] = res def _eval_box_proposals(self, predictions): """ Evaluate the box proposals in predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, objectness_logits = [], [], [] for prediction in predictions: ids.append(prediction["image_id"]) boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } with, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation:"Annotations are not available for evaluation.") return"Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100)"Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res
# inspired from Detectron: # # noqa def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official LVIS API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) anno = lvis_api.load_anns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno ] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor([obj["area"] for obj in anno]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = (, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) ) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, } def _evaluate_predictions_on_lvis( lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None ): """ Args: iou_type (str): max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP This limit, by default of the LVIS dataset, is 300. class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) if max_dets_per_image is None: max_dets_per_image = 300 # Default for LVIS dataset from lvis import LVISEval, LVISResults"Evaluating with max detections per image = {max_dets_per_image}") lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics}"Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) return results