7. Example labels and landmark dataset loading#
written by Eshin Jolly
This notebook demonstrates how we extracted labels and landmarks from the EmotioNet dataset. You’ll want to repeat this for the DISFA Plus and BP4d datasets prior to training the AU visualization model.
Imports, paths, and helper functions#
Make sure to adjust data paths as needed. By default this notebook assumes datasets are in the data/datasets
folder at the root of this repository.
from PIL import Image, ImageOps
import math, cv2, csv
from scipy.spatial import ConvexHull
from skimage.morphology.convex_hull import grid_points_in_poly
from feat import Detector
import os, glob, pandas as pd, numpy as np
from skimage.feature import hog
from tqdm import tqdm
# Set data directory to data folder relative to location of this notebook
data_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.realpath(""))), "data", "datasets"
)
def padding(img, expected_size):
desired_size = expected_size
delta_width = desired_size - img.size[0]
delta_height = desired_size - img.size[1]
pad_width = delta_width // 2
pad_height = delta_height // 2
padding = (
pad_width,
pad_height,
delta_width - pad_width,
delta_height - pad_height,
)
return ImageOps.expand(img, padding)
def resize_with_padding(img, expected_size):
img.thumbnail((expected_size[0], expected_size[1]))
delta_width = expected_size[0] - img.size[0]
delta_height = expected_size[1] - img.size[1]
pad_width = delta_width // 2
pad_height = delta_height // 2
padding = (
pad_width,
pad_height,
delta_width - pad_width,
delta_height - pad_height,
)
return ImageOps.expand(img, padding)
def align_face_68pts(img, img_land, box_enlarge, img_size=112):
"""
img: image
img_land: landmarks 68
box_enlarge: relative size of face
img_size = 112
"""
leftEye0 = (
img_land[2 * 36]
+ img_land[2 * 37]
+ img_land[2 * 38]
+ img_land[2 * 39]
+ img_land[2 * 40]
+ img_land[2 * 41]
) / 6.0
leftEye1 = (
img_land[2 * 36 + 1]
+ img_land[2 * 37 + 1]
+ img_land[2 * 38 + 1]
+ img_land[2 * 39 + 1]
+ img_land[2 * 40 + 1]
+ img_land[2 * 41 + 1]
) / 6.0
rightEye0 = (
img_land[2 * 42]
+ img_land[2 * 43]
+ img_land[2 * 44]
+ img_land[2 * 45]
+ img_land[2 * 46]
+ img_land[2 * 47]
) / 6.0
rightEye1 = (
img_land[2 * 42 + 1]
+ img_land[2 * 43 + 1]
+ img_land[2 * 44 + 1]
+ img_land[2 * 45 + 1]
+ img_land[2 * 46 + 1]
+ img_land[2 * 47 + 1]
) / 6.0
deltaX = rightEye0 - leftEye0
deltaY = rightEye1 - leftEye1
l = math.sqrt(deltaX * deltaX + deltaY * deltaY)
sinVal = deltaY / l
cosVal = deltaX / l
mat1 = np.mat([[cosVal, sinVal, 0], [-sinVal, cosVal, 0], [0, 0, 1]])
mat2 = np.mat(
[
[leftEye0, leftEye1, 1],
[rightEye0, rightEye1, 1],
[img_land[2 * 30], img_land[2 * 30 + 1], 1],
[img_land[2 * 48], img_land[2 * 48 + 1], 1],
[img_land[2 * 54], img_land[2 * 54 + 1], 1],
]
)
mat2 = (mat1 * mat2.T).T
cx = float((max(mat2[:, 0]) + min(mat2[:, 0]))) * 0.5
cy = float((max(mat2[:, 1]) + min(mat2[:, 1]))) * 0.5
if float(max(mat2[:, 0]) - min(mat2[:, 0])) > float(
max(mat2[:, 1]) - min(mat2[:, 1])
):
halfSize = 0.5 * box_enlarge * float((max(mat2[:, 0]) - min(mat2[:, 0])))
else:
halfSize = 0.5 * box_enlarge * float((max(mat2[:, 1]) - min(mat2[:, 1])))
scale = (img_size - 1) / 2.0 / halfSize
mat3 = np.mat(
[
[scale, 0, scale * (halfSize - cx)],
[0, scale, scale * (halfSize - cy)],
[0, 0, 1],
]
)
mat = mat3 * mat1
aligned_img = cv2.warpAffine(
img,
mat[0:2, :],
(img_size, img_size),
cv2.INTER_LINEAR,
borderValue=(128, 128, 128),
)
land_3d = np.ones((int(len(img_land) / 2), 3))
land_3d[:, 0:2] = np.reshape(np.array(img_land), (int(len(img_land) / 2), 2))
mat_land_3d = np.mat(land_3d)
new_land = np.array((mat * mat_land_3d.T).T)
new_land = np.array(list(zip(new_land[:, 0], new_land[:, 1]))).astype(int)
return aligned_img, new_land
def extract_hog(image, detector):
im = cv2.imread(image)
detected_faces = np.array(detector.detect_faces(im)[0])
if np.any(detected_faces < 0):
orig_size = np.array(im).shape
if np.where(detected_faces < 0)[0][0] == 1:
new_size = (
orig_size[0],
int(orig_size[1] + 2 * abs(detected_faces[detected_faces < 0][0])),
)
else:
new_size = (
int(orig_size[0] + 2 * abs(detected_faces[detected_faces < 0][0])),
orig_size[1],
)
im = resize_with_padding(Image.fromarray(im), new_size)
im = np.asarray(im)
detected_faces = np.array(detector.detect_faces(np.array(im))[0])
detected_faces = detected_faces.astype(int)
points = detector.detect_landmarks(np.array(im), [detected_faces])[0].astype(int)
aligned_img, points = align_face_68pts(im, points.flatten(), 2.5)
hull = ConvexHull(points)
mask = grid_points_in_poly(
shape=np.array(aligned_img).shape,
verts=list(
zip(points[hull.vertices][:, 1], points[hull.vertices][:, 0])
), # for some reason verts need to be flipped
)
mask[0 : np.min([points[0][1], points[16][1]]), points[0][0] : points[16][0]] = True
aligned_img[~mask] = 0
resized_face_np = aligned_img
fd, hog_image = hog(
resized_face_np,
orientations=8,
pixels_per_cell=(8, 8),
cells_per_block=(2, 2),
visualize=True,
multichannel=True,
)
return fd, hog_image, points
Example extraction on EmotioNet#
You’ll need to run this for DISFA Plus and BP4d as well in order to train our AU visualization model
dataset = "EmotionNet"
labels_filename = "emotionet_labels.csv"
landmarks_filename = "emotionet_landmarks.csv"
detector = Detector(face_model="retinaface", landmark_model="mobilenet")
EmotioNet_images = np.sort(glob.glob(os.path.join(data_dir, dataset, "imgs", "*.jpg")))
labels = pd.read_csv(
os.path.join(data_dir, dataset, "labels", "EmotioNet_FACS_aws_2020_24600.csv")
)
labels = labels.dropna(axis=0)
for col in labels.columns:
if "AU" in col:
kwargs = {col.replace("'", "").replace('"', "").replace(" ", ""): labels[[col]]}
labels = labels.assign(**kwargs)
labels = labels.drop(columns=col)
labels = labels.assign(
URL=labels.URL.apply(lambda x: x.split("/")[-1].replace("'", ""))
)
labels = labels.set_index("URL")
labels = labels.drop(columns=["URL orig"])
aus_to_train = [
"AU1",
"AU2",
"AU4",
"AU5",
"AU6",
"AU7",
"AU9",
"AU10",
"AU11",
"AU12",
"AU14",
"AU15",
"AU17",
"AU20",
"AU23",
"AU24",
"AU25",
"AU26",
"AU28",
"AU43",
]
with open(labels_filename, "w", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter=",")
writer.writerow(["URL"] + aus_to_train)
landmark_cols = [f"x_{i}" for i in range(68)] + [f"y_{i}" for i in range(68)]
with open(landmarks_filename, "w", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter=",")
writer.writerow(landmark_cols)
for ix, image in enumerate(tqdm(EmotioNet_images)):
try:
imageURL = os.path.split(image)[-1]
label = labels.loc[imageURL][aus_to_train]
fd, _, points = extract_hog(image, detector=detector)
with open(labels_filename, "a+", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter=",")
writer.writerow([imageURL] + list(label.values))
with open(landmarks_filename, "a+", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter=",")
writer.writerow(points.T.flatten())
except:
print(f"failed {image}")