Neural style transfer is an optimization technique used to take two images, a content image, a style reference image (such as an artwork by a famous painter), and blend them together such that the input image is transformed to look like the content image, but βpaintedβ in the style of the style image.
Deep neural networks have already surpassed human level performance in tasks such as object recognition and detection. However, deep networks were lagging far behind in tasks like generating artistic artefacts having high perceptual quality until recent times. Creating better quality art using machine learning techniques is imperative for reaching human-like capabilities, as well as opens up a new spectrum of possibilities. And with the advancement of computer hardware as well as the proliferation of deep learning, deep learning is right now being used to create art. In this project, an AI will attempt to generate art based on two images as reference.
Dataset:
# Import libraries
import tensorflow as tf
import numpy as np
from tensorflow.keras import backend as K
from matplotlib import pyplot as plt
from PIL import Image
%matplotlib inline
# Set Parameters
lr = 7.0
size = 224
iterations = 800
style_wt = 1e5
content_wt = 1.0
# Specify content and style
content_image_path = "content.jpg"
style_image_path = "style.png"
# Check TensorFlow version
print("TensorFlow version:", tf.__version__)
# Style layer weights
style_layer_wts = [1.0, 0.8, 0.1, 0.1, 0.2]
# Transfer learning using VGG19
model = tf.keras.applications.vgg19.VGG19(include_top=False, weights="imagenet", input_shape=(size, size, 3))
# Freeze parameters
model.trainable = False
# Check model summary
model.summary()
# Image processing function
def preprocess_image(image_path):
img = tf.keras.preprocessing.image.load_img(image_path, target_size=(size, size))
img = tf.keras.preprocessing.image.img_to_array(img)
img = tf.keras.applications.vgg19.preprocess_input(img)
return np.expand_dims(img, axis = 0)
# Image deprocessing function
def deprocess(x):
x[:, :, 0] += 103.939
x[:, :, 1] += 116.779
x[:, :, 2] += 123.68
x = x[:, :, ::-1]
x = np.clip(x, 0, 255).astype('uint8')
return x
# Set display function
def display_image(image):
if len(image.shape) == 4:
image = image[0,:,:,:]
img = deprocess(image)
plt.xticks([])
plt.yticks([])
plt.imshow(img)
plt.show()
# Show style image
display_image(preprocess_image('style.jpg'))
# Show content image
display_image(preprocess_image('content.jpg'))
# Set content layer
content_layer = 'block4_conv2'
# Model architecture
content_model = tf.keras.models.Model(
inputs = model.input,
outputs = model.get_layer(content_layer).output
)
# Specify style layers
style_layers = [
'block1_conv1', 'block1_conv2',
'block2_conv1', 'block2_conv2',
'block3_conv1'
]
# Set Style model
style_models = [
tf.keras.models.Model(inputs=model.input, outputs=model.get_layer(layer).output)
for layer in style_layers
]
# Default setting
def content_cost(content_img, generated_img):
C = content_model(content_img)
G = content_model(generated_img)
cost = tf.reduce_mean(tf.square(C - G))
return cost
# Default setting
def gram_matrix(M):
num_channels = tf.shape(M)[-1]
M = tf.reshape(M, shape=(-1, num_channels))
n = tf.shape(M)[0]
G = tf.matmul(tf.transpose(M), M)
return G / tf.cast(n, dtype=tf.float32)
# Default setting
def style_cost(style_img, generated_img):
total_cost = 0
for i, style_model in enumerate(style_models):
S = style_model(style_img)
G = style_model(generated_img)
GS = gram_matrix(S)
GG = gram_matrix(G)
current_cost = style_layer_wts[i] * tf.reduce_mean(tf.square(GS - GG))
total_cost += current_cost
total_cost /= (size * size * len(style_models))
return total_cost
# Label data
content_image_preprocessed = preprocess_image('content.jpg')
style_image_preprocessed = preprocess_image('style.jpg')
generated_image = tf.Variable(content_image_preprocessed, dtype=tf.float32)
# Create empty list
generated_images = []
costs = []
# Set optimizer
optimizer = tf.optimizers.Adam(learning_rate=lr)
# Initiate style transfer
for i in range(iterations):
with tf.GradientTape() as tape:
J_content = content_cost(content_img=content_image_preprocessed, generated_img=generated_image)
J_style = style_cost(style_img=style_image_preprocessed, generated_img=generated_image)
J_total = content_wt * J_content + style_wt * J_style
gradients = tape.gradient(J_total, generated_image)
optimizer.apply_gradients([(gradients, generated_image)])
costs.append(J_total.numpy())
# Show results after a certain number of iterations
if i % 100 == 0:
display_image(generated_image.numpy())
generated_images.append(generated_image.numpy())
print("Iteration:{}/{}, Total Cost:{}, Style Cost: {}, Content Cost: {}".format(i+1, iterations, J_total, J_style, J_content))
# Create plot
plt.plot(range(iterations), costs)
plt.xlabel("Iterations")
plt.ylabel("Total Cost")
plt.show()
# Deprocess image
image = Image.fromarray(deprocess(generated_images[-1][0]))
# Show output image
plt.imshow(image)
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.savefig('out.png')
The model training was too long. It is recommended to this kind of project in a computer with high specification specifically with high GPUs or use Google colab for their free TPU. Overall, the result image is nice. It can be concluded that AI can also generate art. :)