Facial key-point detection serves as a basis for Emotional AI applications like detecting customer emotional responses to Ads and Driver monitoring Systems. In this project, a deep learning model based on Convolutional Neural Network and Residual Blocks to predict facial key points.
Affectiva is one of the leading players in Emotional AI and their software detects human emotion, complex cognitive states and behaviours. (https://www.affectiva.com/)
Problem:
Dataset:
Source: Kaggle Competition
# Import the necessary packages
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from IPython.display import display
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
# load the data
facialpoints_df = pd.read_csv('KeyFacialPoints.csv')
# Check the data
facialpoints_df.head()
# Check data info
facialpoints_df.info()
# Let's take a look at a sample image
facialpoints_df['Image'][1];
# Since values for the image is given as space separated string, we will need to separate the values using ' ' as separator.
# Then convert this into numpy array using np.fromstring and convert the obtained 1D array into 2D array of shape (96,96)
facialpoints_df['Image'] = facialpoints_df['Image'].apply(lambda x: np.fromstring(x, dtype= int, sep = ' ').reshape(96,96))
# Let's obtain the shape of the resized image
facialpoints_df['Image'][1].shape
# Let's confirm that there are no null values
facialpoints_df.isnull().sum()
# Plot a random image from the dataset along with facial keypoints.
i = np.random.randint(1, len(facialpoints_df))
plt.imshow(facialpoints_df['Image'][i],cmap='gray')
plt.show
# The (x, y) coordinates for the 15 key features are plotted on top of the image
# Below is a for loop starting from index = 1 to 32 with step of 2
# In the first iteration j would be 1, followed by 3 and so on.
# since x-coordinates are in even columns like 0,2,4,.. and y-coordinates are in odd columns like 1,3,5,..
# we access their value using .loc command, which get the values for coordinates of the image based on the column it is refering to.
# in the first iteration df[i][j-1] would be df[i][0] refering the value in 1st column(x-coordinate) of the image in 'i' row.
plt.figure()
plt.imshow(facialpoints_df['Image'][i],cmap='gray')
for j in range(1,31,2):
plt.plot(facialpoints_df.loc[i][j-1], facialpoints_df.loc[i][j], 'rx')
# Import library
import random
# Let's view more images in a grid format
fig = plt.figure(figsize=(20, 20))
for i in range(16):
ax = fig.add_subplot(4, 4, i + 1)
image = plt.imshow(facialpoints_df['Image'][i], cmap = 'gray')
for j in range(1,31,2):
plt.plot(facialpoints_df.loc[i][j-1], facialpoints_df.loc[i][j], 'rx')
# Import library
import random
# Let's view more images in a grid format
fig = plt.figure(figsize=(20, 20))
for i in range(64):
ax = fig.add_subplot(8, 8, i + 1)
# Generate random number within a range
img = np.random.randint(1, len(facialpoints_df))
image = plt.imshow(facialpoints_df['Image'][img], cmap = 'gray')
for j in range(1,31,2):
plt.plot(facialpoints_df.loc[img][j-1], facialpoints_df.loc[img][j], 'rx')
# Import library
import copy
# Create a new copy of the dataframe
facialpoints_df_copy = copy.copy(facialpoints_df)
# obtain the header of the DataFrame (names of columns)
columns = facialpoints_df_copy.columns[:-1]
# Check Cloumns
columns
# Take a look at the pixel values of a sample image and see if it makes sense!
facialpoints_df['Image'][0];
# plot the sample image
plt.imshow(facialpoints_df['Image'][0], cmap = 'gray')
plt.show()
# Now Let's flip the image column horizontally
facialpoints_df_copy['Image'] = facialpoints_df_copy['Image'].apply(lambda x: np.flip(x, axis = 1))
# Now take a look at the flipped image and do a sanity check!
# Notice that the values of pixels are now flipped
facialpoints_df_copy['Image'][0];
# Notice that the image is flipped now
plt.imshow(facialpoints_df_copy['Image'][0], cmap = 'gray')
plt.show()
# Since we are flipping the images horizontally, y coordinate values would be the same
# X coordinate values only would need to change, all we have to do is to subtract our initial x-coordinate values from width of the image(96)
for i in range(len(columns)):
if i%2 == 0:
facialpoints_df_copy[columns[i]] = facialpoints_df_copy[columns[i]].apply(lambda x: 96. - float(x) )
# View the Original image
plt.imshow(facialpoints_df['Image'][0],cmap='gray')
for j in range(1, 31, 2):
plt.plot(facialpoints_df.loc[0][j-1], facialpoints_df.loc[0][j], 'rx')
# View the Horizontally flipped image
plt.imshow(facialpoints_df_copy['Image'][0], cmap='gray')
for j in range(1, 31, 2):
plt.plot(facialpoints_df_copy.loc[0][j-1], facialpoints_df_copy.loc[0][j], 'rx')
# Concatenate the original dataframe with the augmented dataframe
facialpoints_df_augmented = np.concatenate((facialpoints_df,facialpoints_df_copy))
# Check dimension
facialpoints_df_augmented.shape
# Import library
import random
# Let's try to perform another image augmentation by randomly increasing images brightness
# We multiply pixel values by random values between 1 and 2 to increase the brightness of the image
# we clip the value between 0 and 255
facialpoints_df_copy = copy.copy(facialpoints_df)
facialpoints_df_copy['Image'] = facialpoints_df['Image'].apply(lambda x:np.clip(random.uniform(1, 2) * x, 0.0, 255.0))
facialpoints_df_augmented = np.concatenate((facialpoints_df_augmented, facialpoints_df_copy))
# Check dimension
facialpoints_df_augmented.shape
# Create another copy
facialpoints_df_copy = copy.copy(facialpoints_df)
# Flip the image column vertically (note that axis = 0)
facialpoints_df_copy['Image'] = facialpoints_df_copy['Image'].apply(lambda x: np.flip(x, axis = 0))
facialpoints_df['Image'][0]
facialpoints_df_copy['Image'][0]
# Since we are flipping the images vertically, x coordinate values would be the same
# y coordinate values only would need to change, all we have to do is to subtract our initial y-coordinate values from width of the image(96)
for i in range(len(columns)):
if i%2 == 1:
facialpoints_df_copy[columns[i]] = facialpoints_df_copy[columns[i]].apply(lambda x: 96. - float(x) )
# View the Horizontally flipped image
plt.imshow(facialpoints_df_copy['Image'][0], cmap='gray')
for j in range(1, 31, 2):
plt.plot(facialpoints_df_copy.loc[0][j-1], facialpoints_df_copy.loc[0][j], 'rx')
# Obtain the value of 'Images' and normalize it
# Note that 'Images' are in the 31st column but since indexing start from 0, we refer 31st column by 30
img = facialpoints_df_augmented[:, 30]
img = img/255.
# Create an empty array of shape (10700, 96, 96, 1) to train the model
X = np.empty((len(img), 96, 96, 1))
# Iterate through the normalized images list and add image values to the empty array
# Note that we need to expand it's dimension from (96,96) to (96,96,1)
for i in range(len(img)):
X[i,] = np.expand_dims(img[i], axis = 2)
# Convert the array type to float32
X = np.asarray(X).astype(np.float32)
X.shape
# Obtain the values of key face points coordinates, which are to used as target.
y = facialpoints_df_augmented[:,:30]
y = np.asarray(y).astype(np.float32)
y.shape
# Split the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)
# Check train data dimensions
X_train.shape, X_test.shape
# Check test data dimiensions
y_test.shape, y_test.shape
# Let's view more images in a grid format
fig = plt.figure(figsize=(20, 20))
for i in range(64):
ax = fig.add_subplot(8, 8, i + 1)
image = plt.imshow(X_train[i].reshape(96,96), cmap = 'gray')
for j in range(1,31,2):
plt.plot(y_train[i][j-1], y_train[i][j], 'rx')
def res_block(X, filter, stage):
# CONVOLUTIONAL BLOCK
X_copy = X
f1 , f2, f3 = filter
# Main Path
X = Conv2D(f1, (1,1), strides = (1,1), name ='res_'+str(stage)+'_conv_a', kernel_initializer= glorot_uniform(seed = 0))(X)
X = MaxPool2D((2,2))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_a')(X)
X = Activation('relu')(X)
X = Conv2D(f2, kernel_size = (3,3), strides =(1,1), padding = 'same', name ='res_'+str(stage)+'_conv_b', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_b')(X)
X = Activation('relu')(X)
X = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_conv_c', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_c')(X)
# Short path
X_copy = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_conv_copy', kernel_initializer= glorot_uniform(seed = 0))(X_copy)
X_copy = MaxPool2D((2,2))(X_copy)
X_copy = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_copy')(X_copy)
# Add data from main and short paths
X = Add()([X,X_copy])
X = Activation('relu')(X)
# IDENTITY BLOCK 1
X_copy = X
# Main Path
X = Conv2D(f1, (1,1),strides = (1,1), name ='res_'+str(stage)+'_identity_1_a', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_1_a')(X)
X = Activation('relu')(X)
X = Conv2D(f2, kernel_size = (3,3), strides =(1,1), padding = 'same', name ='res_'+str(stage)+'_identity_1_b', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_1_b')(X)
X = Activation('relu')(X)
X = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_identity_1_c', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_1_c')(X)
# Add both paths together (Note that we feed the original input as is hence the name "identity")
X = Add()([X,X_copy])
X = Activation('relu')(X)
# IDENTITY BLOCK 2
X_copy = X
# Main Path
X = Conv2D(f1, (1,1),strides = (1,1), name ='res_'+str(stage)+'_identity_2_a', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_2_a')(X)
X = Activation('relu')(X)
X = Conv2D(f2, kernel_size = (3,3), strides =(1,1), padding = 'same', name ='res_'+str(stage)+'_identity_2_b', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_2_b')(X)
X = Activation('relu')(X)
X = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_identity_2_c', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_2_c')(X)
# Add both paths together (Note that we feed the original input as is hence the name "identity")
X = Add()([X,X_copy])
X = Activation('relu')(X)
return X
input_shape = (96,96,1)
# Input tensor shape
X_input = Input(input_shape)
# Zero-padding
X = ZeroPadding2D((3,3))(X_input)
# Stage #1
X = Conv2D(64, (7,7), strides= (2,2), name = 'conv1', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3,3), strides= (2,2))(X)
# Stage #2
X = res_block(X, filter= [64,64,256], stage= 2)
# Stage #3
X = res_block(X, filter= [128,128,512], stage= 3)
# Average Pooling
X = AveragePooling2D((2,2), name = 'Averagea_Pooling')(X)
# Final layer
X = Flatten()(X)
X = Dense(4096, activation = 'relu')(X)
X = Dropout(0.2)(X)
X = Dense(2048, activation = 'relu')(X)
X = Dropout(0.1)(X)
X = Dense(30, activation = 'relu')(X)
model = Model( inputs= X_input, outputs = X)
model.summary()
# Pick optimizer
adam = tf.keras.optimizers.Adam(lr = 0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
# Compile model
model.compile(loss="mean_squared_error", optimizer = adam, metrics = ['accuracy'])
# Save the best model with least validation loss
checkpointer = ModelCheckpoint(filepath = "weights.hdf5", verbose = 1, save_best_only = True)
history = model.fit(X_train, y_train, batch_size = 256, epochs= 100, validation_split = 0.05, callbacks=[checkpointer])
# Save trained model
model_json = model.to_json()
with open('KeyPointDetector.json', 'w') as json_file:
json_file.write(model_json)
# My laptop is very slow, it would take a lifetime to finish the training.
# Instead of training from scratch, just a load trained model weights.
with open('KeyPointDetector.json', 'r') as json_file:
json_SavedModel = json_file.read()
model = tf.keras.models.model_from_json(json_SavedModel)
model.load_weights('weights.hdf5')
model.compile(loss="mean_squared_error", optimizer = adam, metrics = ['accuracy'])
# Evaluate trained model
result = model.evaluate(X_test,y_test)
print("Accuracy : {}".format(result[1]))
# Make prediction using the testing dataset
df_predict = model.predict(X_test)
# Import library
from sklearn.metrics import mean_squared_error
from math import sqrt
# Print the rmse loss values
rms = sqrt(mean_squared_error(y_test, df_predict))
print("RMSE value : {}".format(rms))
# Convert the predicted values into a dataframe
df_predict= pd.DataFrame(df_predict, columns = columns)
df_predict.head()
# Plot the test images and their predicted keypoints
fig = plt.figure(figsize=(20, 20))
for i in range(8):
ax = fig.add_subplot(4, 2, i + 1)
# Using squeeze to convert the image shape from (96,96,1) to (96,96)
plt.imshow(X_test[i].squeeze(),cmap='gray')
for j in range(1,31,2):
plt.plot(df_predict.loc[i][j-1], df_predict.loc[i][j], 'rx')
Model architecture were built properly but obtained a 21.81 root mean squared error. It is probably due to lack of iteration during the training. Some points are not align properly in the face but there also points somehow able to fit and possible reasons for that because of image augmentation. Image augmentation was needed to implement to avoid machine from memorizing the key point's distance. The model can be further improve by increasing the iteration .