11. Benchmarking Pose detectors using data#

written by Tiankang Xie

In the tutorial we will demonstrate how to evaluate pyfeat bounding pose detection algorithms with evaluation data

import glob
import os
# Extract Pose information from 
from feat.facepose_detectors.img2pose.img2pose_test import Img2Pose
from PIL import Image, ImageDraw
import torchvision.transforms as transforms
import torch
from feat import Detector
import matplotlib.pyplot as plt
import glob
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pickle
import numpy as np
from scipy.spatial.transform import Rotation
from feat import Detector

def convert_to_aflw(rotvec, is_rotvec=True):
    if is_rotvec:
        rotvec = Rotation.from_rotvec(rotvec).as_matrix()
    rot_mat_2 = np.transpose(rotvec)
    angle = Rotation.from_matrix(rot_mat_2).as_euler('xyz', degrees=True)
    
    return np.array([angle[0], -angle[1], -angle[2]])
class GenericImageDataset(Dataset):
    """Generic Image dataset loader for PyTorch."""

    def __init__(self, file_paths, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.filePaths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.filePaths)

    def __getitem__(self, idx):
        img = Image.open(self.filePaths[idx])
        if self.transform:
            img = self.transform(img)
        return img

Provide the path for the data. It can be downloaded from https://www.kaggle.com/datasets/kmader/biwi-kinect-head-pose-database

data_dir = '/Storage/Data/Kinect_Pose/hpdb/'
all_imgs_paths = glob.glob(data_dir+'**/*.png')

Unconstrained#

imclassifier = Img2Pose(constrained=False, detection_threshold=0.25, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_test=1000)

img_trans = transforms.Compose([
    transforms.ToTensor()
])

dataset = GenericImageDataset(all_imgs_paths, transform=img_trans)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2)

all_pred_vals = []
for i_batch, sample_batched in enumerate(tqdm(dataloader)):
    preds = imclassifier(sample_batched)
    all_pred_vals.append(preds[1])

pose_targets = []
for img_path in all_imgs_paths:
    annotations  = open(img_path.replace("_rgb.png", "_pose.txt"))
    lines = annotations.readlines()

    pose_target = []
    for i in range(3):
        lines[i] = str(lines[i].rstrip("\n")) 
        pose_target.append(lines[i].split(" ")[:3])
    pose_target = np.asarray(pose_target).astype(float)     
    pose_target = convert_to_aflw(pose_target, False)
    pose_targets.append(pose_target)
pose_target_arr = np.asarray(pose_targets)#[np.array(invalids), :]
pose_target_arr[:, [1, 2]] = pose_target_arr[:, [2, 1]]

new_arr = []
invalids = []
for i, arrr1 in enumerate(tqdm(all_pred_vals)):
    if len(arrr1[0]) > 0: 
        if len(arrr1[0]) == 1: 
            new_arr.append(arrr1[0][0][0])
            invalids.append(i)
        else:
            vv = []
            for part_arr in arrr1[0]:
                val = np.sum(abs(part_arr - pose_target_arr[i]))
                vv.append(val)
            argmin = np.argmin(vv)
            new_arr.append(arrr1[0][argmin][0])
            invalids.append(i)
    else:
        continue;
new_arr = np.vstack(new_arr)

pose_ae = abs(new_arr - pose_target_arr[invalids,:])
pose_ae_summed = np.sum(pose_ae, axis=1)
neg_index = np.where(pose_ae_summed>100)
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:135: UserWarning: Using 'backbone_name' as positional parameter(s) is deprecated since 0.13 and may be removed in the future. Please use keyword parameter(s) instead.
  warnings.warn(
/home/tiankang/anaconda3/envs/py39/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
  warnings.warn(msg)
100%|██████████| 15678/15678 [05:50<00:00, 44.78it/s]
100%|██████████| 15678/15678 [00:00<00:00, 183154.93it/s]
print('pitch, yaw, roll error in degrees:')
print(np.mean(pose_ae,0))
print('average grand mean')
print(np.mean(pose_ae))
pitch, yaw, roll scores:
[6.25469727 3.38062612 4.54291059]
average grand mean
4.726077992404776

Constrained#

imclassifier = Img2Pose(constrained=True, detection_threshold=0.25, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_test=200)

img_trans = transforms.Compose([
    transforms.ToTensor()
])

dataset = GenericImageDataset(all_imgs_paths, transform=img_trans)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2)

all_pred_vals = []
for i_batch, sample_batched in enumerate(tqdm(dataloader)):
    preds = imclassifier(sample_batched)
    all_pred_vals.append(preds[1])

pose_targets = []
for img_path in all_imgs_paths:
    annotations  = open(img_path.replace("_rgb.png", "_pose.txt"))
    lines = annotations.readlines()

    pose_target = []
    for i in range(3):
        lines[i] = str(lines[i].rstrip("\n")) 
        pose_target.append(lines[i].split(" ")[:3])
    pose_target = np.asarray(pose_target).astype(float)     
    pose_target = convert_to_aflw(pose_target, False)
    pose_targets.append(pose_target)
pose_target_arr = np.asarray(pose_targets)#[np.array(invalids), :]
pose_target_arr[:, [1, 2]] = pose_target_arr[:, [2, 1]]

new_arr = []
invalids = []
for i, arrr1 in enumerate(tqdm(all_pred_vals)):
    if len(arrr1[0]) > 0: 
        if len(arrr1[0]) == 1: 
            new_arr.append(arrr1[0][0][0])
            invalids.append(i)
        else:
            vv = []
            for part_arr in arrr1[0]:
                val = np.sum(abs(part_arr - pose_target_arr[i]))
                vv.append(val)
            argmin = np.argmin(vv)
            new_arr.append(arrr1[0][argmin][0])
            invalids.append(i)
    else:
        continue;
new_arr = np.vstack(new_arr)

pose_ae = abs(new_arr - pose_target_arr[invalids,:])
pose_ae_summed = np.sum(pose_ae, axis=1)
neg_index = np.where(pose_ae_summed>100)
100%|██████████| 15678/15678 [04:39<00:00, 56.12it/s]
100%|██████████| 15678/15678 [00:00<00:00, 123273.54it/s]
print('pitch, yaw, roll scores in degrees:')
print(np.mean(pose_ae,0))
print('average grand mean')
print(np.mean(pose_ae))
pitch, yaw, roll scores:
[4.56942555 3.39055895 4.53926361]
average grand mean
4.16641603923617