11. Benchmarking Pose detectors using data#
written by Tiankang Xie
In the tutorial we will demonstrate how to evaluate pyfeat bounding pose detection algorithms with evaluation data
import glob
import os
# Extract Pose information from
from feat.facepose_detectors.img2pose.img2pose_test import Img2Pose
from PIL import Image, ImageDraw
import torchvision.transforms as transforms
import torch
from feat import Detector
import matplotlib.pyplot as plt
import glob
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pickle
import numpy as np
from scipy.spatial.transform import Rotation
from feat import Detector
def convert_to_aflw(rotvec, is_rotvec=True):
if is_rotvec:
rotvec = Rotation.from_rotvec(rotvec).as_matrix()
rot_mat_2 = np.transpose(rotvec)
angle = Rotation.from_matrix(rot_mat_2).as_euler('xyz', degrees=True)
return np.array([angle[0], -angle[1], -angle[2]])
class GenericImageDataset(Dataset):
"""Generic Image dataset loader for PyTorch."""
def __init__(self, file_paths, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.filePaths = file_paths
self.transform = transform
def __len__(self):
return len(self.filePaths)
def __getitem__(self, idx):
img = Image.open(self.filePaths[idx])
if self.transform:
img = self.transform(img)
return img
Provide the path for the data. It can be downloaded from https://www.kaggle.com/datasets/kmader/biwi-kinect-head-pose-database
data_dir = '/Storage/Data/Kinect_Pose/hpdb/'
all_imgs_paths = glob.glob(data_dir+'**/*.png')
Unconstrained#
imclassifier = Img2Pose(constrained=False, detection_threshold=0.25, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_test=1000)
img_trans = transforms.Compose([
transforms.ToTensor()
])
dataset = GenericImageDataset(all_imgs_paths, transform=img_trans)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2)
all_pred_vals = []
for i_batch, sample_batched in enumerate(tqdm(dataloader)):
preds = imclassifier(sample_batched)
all_pred_vals.append(preds[1])
pose_targets = []
for img_path in all_imgs_paths:
annotations = open(img_path.replace("_rgb.png", "_pose.txt"))
lines = annotations.readlines()
pose_target = []
for i in range(3):
lines[i] = str(lines[i].rstrip("\n"))
pose_target.append(lines[i].split(" ")[:3])
pose_target = np.asarray(pose_target).astype(float)
pose_target = convert_to_aflw(pose_target, False)
pose_targets.append(pose_target)
pose_target_arr = np.asarray(pose_targets)#[np.array(invalids), :]
pose_target_arr[:, [1, 2]] = pose_target_arr[:, [2, 1]]
new_arr = []
invalids = []
for i, arrr1 in enumerate(tqdm(all_pred_vals)):
if len(arrr1[0]) > 0:
if len(arrr1[0]) == 1:
new_arr.append(arrr1[0][0][0])
invalids.append(i)
else:
vv = []
for part_arr in arrr1[0]:
val = np.sum(abs(part_arr - pose_target_arr[i]))
vv.append(val)
argmin = np.argmin(vv)
new_arr.append(arrr1[0][argmin][0])
invalids.append(i)
else:
continue;
new_arr = np.vstack(new_arr)
pose_ae = abs(new_arr - pose_target_arr[invalids,:])
pose_ae_summed = np.sum(pose_ae, axis=1)
neg_index = np.where(pose_ae_summed>100)
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:135: UserWarning: Using 'backbone_name' as positional parameter(s) is deprecated since 0.13 and may be removed in the future. Please use keyword parameter(s) instead.
warnings.warn(
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
100%|██████████| 15678/15678 [05:50<00:00, 44.78it/s]
100%|██████████| 15678/15678 [00:00<00:00, 183154.93it/s]
print('pitch, yaw, roll error in degrees:')
print(np.mean(pose_ae,0))
print('average grand mean')
print(np.mean(pose_ae))
pitch, yaw, roll scores:
[6.25469727 3.38062612 4.54291059]
average grand mean
4.726077992404776
Constrained#
imclassifier = Img2Pose(constrained=True, detection_threshold=0.25, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_test=200)
img_trans = transforms.Compose([
transforms.ToTensor()
])
dataset = GenericImageDataset(all_imgs_paths, transform=img_trans)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2)
all_pred_vals = []
for i_batch, sample_batched in enumerate(tqdm(dataloader)):
preds = imclassifier(sample_batched)
all_pred_vals.append(preds[1])
pose_targets = []
for img_path in all_imgs_paths:
annotations = open(img_path.replace("_rgb.png", "_pose.txt"))
lines = annotations.readlines()
pose_target = []
for i in range(3):
lines[i] = str(lines[i].rstrip("\n"))
pose_target.append(lines[i].split(" ")[:3])
pose_target = np.asarray(pose_target).astype(float)
pose_target = convert_to_aflw(pose_target, False)
pose_targets.append(pose_target)
pose_target_arr = np.asarray(pose_targets)#[np.array(invalids), :]
pose_target_arr[:, [1, 2]] = pose_target_arr[:, [2, 1]]
new_arr = []
invalids = []
for i, arrr1 in enumerate(tqdm(all_pred_vals)):
if len(arrr1[0]) > 0:
if len(arrr1[0]) == 1:
new_arr.append(arrr1[0][0][0])
invalids.append(i)
else:
vv = []
for part_arr in arrr1[0]:
val = np.sum(abs(part_arr - pose_target_arr[i]))
vv.append(val)
argmin = np.argmin(vv)
new_arr.append(arrr1[0][argmin][0])
invalids.append(i)
else:
continue;
new_arr = np.vstack(new_arr)
pose_ae = abs(new_arr - pose_target_arr[invalids,:])
pose_ae_summed = np.sum(pose_ae, axis=1)
neg_index = np.where(pose_ae_summed>100)
100%|██████████| 15678/15678 [04:39<00:00, 56.12it/s]
100%|██████████| 15678/15678 [00:00<00:00, 123273.54it/s]
print('pitch, yaw, roll scores in degrees:')
print(np.mean(pose_ae,0))
print('average grand mean')
print(np.mean(pose_ae))
pitch, yaw, roll scores:
[4.56942555 3.39055895 4.53926361]
average grand mean
4.16641603923617