9. Benchmarking Bounding Box using data#
written by Tiankang Xie
In the tutorial we will demonstrate how to evaluate pyfeat bounding box algorithms with evaluation data.
import pickle
import numpy as np
import os
from scipy.io import loadmat
from tqdm import tqdm
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import pandas as pd
from feat.facepose_detectors.img2pose.img2pose_test import Img2Pose
from PIL import Image, ImageDraw
import torchvision.transforms as transforms
import torch
import glob
from feat import Detector
import matplotlib.pyplot as plt
from feat.facepose_detectors.img2pose.img2pose_test import Img2Pose
The benchmark script has already been provided by the authors in Matlab. We borrow code from https://github.com/wondervictor/WiderFace-Evaluation, which is a Python version of the original Matlab code.
# Copyright (c) OpenMMLab. All rights reserved.
def voc_ap(rec, prec):
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def dataset_pr_info(thresh_num, pr_curve, count_face):
_pr_curve = np.zeros((thresh_num, 2))
for i in range(thresh_num):
_pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
_pr_curve[i, 1] = pr_curve[i, 1] / count_face
return _pr_curve
def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall):
pr_info = np.zeros((thresh_num, 2)).astype('float')
for t in range(thresh_num):
thresh = 1 - (t+1)/thresh_num
r_index = np.where(pred_info[:, 4] >= thresh)[0]
if len(r_index) == 0:
pr_info[t, 0] = 0
pr_info[t, 1] = 0
else:
r_index = r_index[-1]
p_index = np.where(proposal_list[:r_index+1] == 1)[0]
pr_info[t, 0] = len(p_index)
pr_info[t, 1] = pred_recall[r_index]
return pr_info
def bbox_overlaps(bboxes1,
bboxes2,
mode='iou',
eps=1e-6,
use_legacy_coordinate=False):
"""Calculate the ious between each bbox of bboxes1 and bboxes2.
Args:
bboxes1 (ndarray): Shape (n, 4)
bboxes2 (ndarray): Shape (k, 4)
mode (str): IOU (intersection over union) or IOF (intersection
over foreground)
use_legacy_coordinate (bool): Whether to use coordinate system in
mmdet v1.x. which means width, height should be
calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.
Note when function is used in `VOCDataset`, it should be
True to align with the official implementation
`http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar`
Default: False.
Returns:
ious (ndarray): Shape (n, k)
"""
assert mode in ['iou', 'iof']
if not use_legacy_coordinate:
extra_length = 0.
else:
extra_length = 1.
bboxes1 = bboxes1.astype(np.float32)
bboxes2 = bboxes2.astype(np.float32)
rows = bboxes1.shape[0]
cols = bboxes2.shape[0]
ious = np.zeros((rows, cols), dtype=np.float32)
if rows * cols == 0:
return ious
exchange = False
if bboxes1.shape[0] > bboxes2.shape[0]:
bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + extra_length) * (
bboxes1[:, 3] - bboxes1[:, 1] + extra_length)
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
bboxes2[:, 3] - bboxes2[:, 1] + extra_length)
for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start + extra_length, 0) * np.maximum(
y_end - y_start + extra_length, 0)
if mode == 'iou':
union = area1[i] + area2 - overlap
else:
union = area1[i] if not exchange else area2
union = np.maximum(union, eps)
ious[i, :] = overlap / union
if exchange:
ious = ious.T
return ious
def image_eval(pred, gt, ignore, iou_thresh):
""" single image evaluation
pred: Nx5
gt: Nx4
ignore:
"""
_pred = pred.copy()
_gt = gt.copy()
pred_recall = np.zeros(_pred.shape[0])
recall_list = np.zeros(_gt.shape[0])
proposal_list = np.ones(_pred.shape[0])
# _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
# _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
_gt[:, 2] = _gt[:, 2] + _gt[:, 0]
_gt[:, 3] = _gt[:, 3] + _gt[:, 1]
overlaps = bbox_overlaps(_pred[:, :4], _gt)
for h in range(_pred.shape[0]):
gt_overlap = overlaps[h]
max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
if max_overlap >= iou_thresh:
if ignore[max_idx] == 0:
recall_list[max_idx] = -1
proposal_list[h] = -1
elif recall_list[max_idx] == 0:
recall_list[max_idx] = 1
r_keep_index = np.where(recall_list == 1)[0]
pred_recall[h] = len(r_keep_index)
return pred_recall, proposal_list
def get_gt_boxes(gt_dir):
""" gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)"""
gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat'))
hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat'))
medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat'))
easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat'))
facebox_list = gt_mat['face_bbx_list']
event_list = gt_mat['event_list']
file_list = gt_mat['file_list']
hard_gt_list = hard_mat['gt_list']
medium_gt_list = medium_mat['gt_list']
easy_gt_list = easy_mat['gt_list']
return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list
def load_preds(pred_dir='/Storage/Projects/pyfeat_testing/Data/WIDER_BBOX_IMG2POSE/preds.pkl'):
with open(pred_dir, 'rb') as fp:
all_imgs, all_pred_vals = pickle.load(fp)
boxes = dict()
for i, img_name in enumerate(all_imgs):
event_name = [mai for mai in img_name.split('/') if '--' in mai][0]
if event_name not in boxes:
boxes[event_name] = {}
pred_box = np.array(all_pred_vals[i])
boxes[event_name][os.path.basename(img_name).rstrip('.jpg')] = pred_box
return boxes
def norm_score(pred):
""" norm score
pred {key: [[x1,y1,x2,y2,s]]}
"""
max_score = 0
min_score = 1
for _, k in pred.items():
for _, v in k.items():
if len(v.shape) == 0 or len(v) == 0:
continue
_min = np.min(v[:, -1])
_max = np.max(v[:, -1])
max_score = max(_max, max_score)
min_score = min(_min, min_score)
diff = max_score - min_score
for _, k in pred.items():
for _, v in k.items():
if len(v.shape) == 0 or len(v) == 0:
continue
v[:, -1] = (v[:, -1] - min_score)/diff
def print_ap_scores(result_fp):
iou_thresh=0.5
pred = load_preds(pred_dir=result_fp) # Where you save the result in the extract_bbox_img2Pose.py script
norm_score(pred)
facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_dir='/Storage/Data/wider/wider_face_split/') # Where ground-truth are stored
event_num = len(event_list)
thresh_num = 1000
settings = ['easy', 'medium', 'hard']
setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list]
aps = []
for setting_id in range(3):
# different setting
gt_list = setting_gts[setting_id]
count_face = 0
pr_curve = np.zeros((thresh_num, 2)).astype('float')
# [hard, medium, easy]
pbar = tqdm(range(event_num))
for i in pbar:
pbar.set_description('Processing {}'.format(settings[setting_id]))
event_name = str(event_list[i][0][0])
img_list = file_list[i][0]
pred_list = pred[event_name]
sub_gt_list = gt_list[i][0]
# img_pr_info_list = np.zeros((len(img_list), thresh_num, 2))
gt_bbx_list = facebox_list[i][0]
for j in range(len(img_list)):
pred_info = pred_list[str(img_list[j][0][0])]
gt_boxes = gt_bbx_list[j][0].astype('float')
keep_index = sub_gt_list[j][0]
count_face += len(keep_index)
if len(gt_boxes) == 0 or len(pred_info.shape) == 0 or len(pred_info) == 0:
continue
ignore = np.zeros(gt_boxes.shape[0])
if len(keep_index) != 0:
ignore[keep_index-1] = 1
pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh)
_img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
pr_curve += _img_pr_info
pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
propose = pr_curve[:, 0]
recall = pr_curve[:, 1]
ap = voc_ap(recall, propose)
aps.append(ap)
print("==================== Results ====================")
print("Easy Val AP: {}".format(aps[0]))
print("Medium Val AP: {}".format(aps[1]))
print("Hard Val AP: {}".format(aps[2]))
print("=================================================")
return aps
We provide the path for wider dataset, the ground true labels, and where the results are been saved. Dataset and ground truth labels can be downloaded at http://shuoyang1213.me/WIDERFACE/
data_dir = '/Storage/Data/wider/'
true_result_dir = '/Storage/Data/wider/wider_face_split/'
save_result_dir = '/Storage/Projects/pyfeat_testing/Data_Eshin/facebox_test/'
all_imgs = glob.glob(data_dir+'WIDER_val/images/**/*.jpg')
1. Test of FaceBoxes#
detector = Detector(face_model='faceboxes',emotion_model='resmasknet', landmark_model='mobilefacenet', au_model='xgb', device='cpu')
all_pred_vals = []
for img in tqdm(all_imgs):
im1 = Image.open(img)
face_aus = detector.detect_faces(im1)
all_pred_vals.append(face_aus[0])
with open(save_result_dir+'FaceBoxes_bench_results.pkl', 'wb') as fp:
pickle.dump((all_imgs, all_pred_vals), fp)
100%|██████████| 3226/3226 [02:32<00:00, 21.11it/s]
facebox_normal = print_ap_scores(result_fp=save_result_dir+'FaceBoxes_bench_results.pkl')
Processing easy: 100%|██████████| 61/61 [00:11<00:00, 5.33it/s]
Processing medium: 100%|██████████| 61/61 [00:11<00:00, 5.33it/s]
Processing hard: 100%|██████████| 61/61 [00:11<00:00, 5.31it/s]
==================== Results ====================
Easy Val AP: 0.5368750176845414
Medium Val AP: 0.34812514764839486
Hard Val AP: 0.14662014664396028
=================================================
2. Test of MTCNN#
detector = Detector(face_model='mtcnn',emotion_model='resmasknet', landmark_model='mobilefacenet', au_model='xgb', device='cpu')
all_pred_vals = []
for img in tqdm(all_imgs):
im1 = Image.open(img)
face_aus = detector.detect_faces(im1)
all_pred_vals.append(face_aus[0])
with open(save_result_dir+'MTCNN_bench_results.pkl', 'wb') as fp:
pickle.dump((all_imgs, all_pred_vals), fp)
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:135: UserWarning: Using 'backbone_name' as positional parameter(s) is deprecated since 0.13 and may be removed in the future. Please use keyword parameter(s) instead.
warnings.warn(
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
100%|██████████| 3226/3226 [07:13<00:00, 7.44it/s]
mtcnn_normal = print_ap_scores(result_fp=save_result_dir+'MTCNN_bench_results.pkl')
Processing easy: 100%|██████████| 61/61 [00:16<00:00, 3.77it/s]
Processing medium: 100%|██████████| 61/61 [00:16<00:00, 3.75it/s]
Processing hard: 100%|██████████| 61/61 [00:16<00:00, 3.75it/s]
==================== Results ====================
Easy Val AP: 0.7248933447919402
Medium Val AP: 0.7175922904388756
Hard Val AP: 0.47326227608164284
=================================================
3. Test of RetinaFace#
detector = Detector(face_model='retinaface',emotion_model='resmasknet', landmark_model='mobilefacenet', au_model='xgb', device='cpu')
all_pred_vals = []
for img in tqdm(all_imgs):
im1 = Image.open(img)
face_aus = detector.detect_faces(im1)
all_pred_vals.append(face_aus[0])
with open(save_result_dir+'RetinaFace_bench_results.pkl', 'wb') as fp:
pickle.dump((all_imgs, all_pred_vals), fp)
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:135: UserWarning: Using 'backbone_name' as positional parameter(s) is deprecated since 0.13 and may be removed in the future. Please use keyword parameter(s) instead.
warnings.warn(
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
6%|▌ | 178/3226 [00:22<05:13, 9.72it/s]
retinaface_normal = print_ap_scores(result_fp=save_result_dir+'RetinaFace_bench_results.pkl')
4. Test of Img2Pose unconstrained#
Img2Pose models are heavy both in architecture and in number of hyperparameters. We advise to use different parameter combinations for different settings, especially for constrained vs unconstrained#
from feat.facepose_detectors.img2pose.img2pose_test import Img2Pose
from torch.utils.data import Dataset, DataLoader
class GenericImageDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, file_paths, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.filePaths = file_paths
self.transform = transform
def __len__(self):
return len(self.filePaths)
def __getitem__(self, idx):
img = Image.open(self.filePaths[idx])
if self.transform:
img = self.transform(img)
return img
imclassifier = Img2Pose(constrained=False, detection_threshold=0.25, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_test=1000)
img_trans = transforms.Compose([
transforms.ToTensor()
])
dataset = GenericImageDataset(all_imgs, transform=img_trans)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1)
all_pred_vals = []
for i_batch, sample_batched in enumerate(tqdm(dataloader)):
preds = imclassifier(sample_batched)
all_pred_vals.append(preds[0][0]) # Append Face Bounding Box
# Save Result
with open(save_result_dir+'Img2poseuncon_bench_results.pkl', 'wb') as fp:
pickle.dump((all_imgs, all_pred_vals), fp)
100%|██████████| 3226/3226 [01:48<00:00, 29.82it/s]
img2pose_uncon_normal = print_ap_scores(result_fp=save_result_dir+'Img2poseuncon_bench_results.pkl')
Processing easy: 100%|██████████| 61/61 [00:19<00:00, 3.18it/s]
Processing medium: 100%|██████████| 61/61 [00:19<00:00, 3.20it/s]
Processing hard: 100%|██████████| 61/61 [00:18<00:00, 3.21it/s]
==================== Results ====================
Easy Val AP: 0.8563027227316843
Medium Val AP: 0.8136765059086696
Hard Val AP: 0.5739961257745989
=================================================
5. Test of Img2Pose constrained#
imclassifier = Img2Pose(constrained=True, detection_threshold=0.25, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_test=200)
img_trans = transforms.Compose([
transforms.ToTensor()
])
dataset = GenericImageDataset(all_imgs, transform=img_trans)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1)
all_pred_vals = []
for i_batch, sample_batched in enumerate(tqdm(dataloader)):
preds = imclassifier(sample_batched)
all_pred_vals.append(preds[0][0]) # Append Face Bounding Box
# Save Result
with open(save_result_dir+'Img2posecon_bench_results.pkl', 'wb') as fp:
pickle.dump((all_imgs, all_pred_vals), fp)
100%|██████████| 3226/3226 [01:13<00:00, 43.72it/s]
img2pose_con_normal = print_ap_scores(result_fp=save_result_dir+'Img2posecon_bench_results.pkl')
Processing easy: 100%|██████████| 61/61 [00:16<00:00, 3.60it/s]
Processing medium: 100%|██████████| 61/61 [00:17<00:00, 3.56it/s]
Processing hard: 100%|██████████| 61/61 [00:17<00:00, 3.58it/s]
==================== Results ====================
Easy Val AP: 0.6470359115773076
Medium Val AP: 0.5878560483278932
Hard Val AP: 0.32415904798673495
=================================================