Algorithmic trading is becoming popular over the past few years and also by using robot to do the work for them. It uses a method of executing orders using automated pre-programmed trading instructions. This project will attempt to predict the stock prices of Netflix and Facebook by using machine learning algorithms.
Problem
Dataset
Source: Yahoo Finance
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.style.use('bmh')
# Load the data
df = pd.read_csv('project_data/NFLX.csv')
df.head()
# Get the traing days
df.shape
# Visualize the close price data
plt.figure(figsize=(16,8))
plt.title('NETFLIX')
plt.xlabel('Days')
plt.ylabel('Close Price (USD)')
plt.plot(df['Close'])
plt.show()
# Get the close price
df = df[['Close']]
df.head()
# Create a variable to predict 'x' days out in the future
future_days = 30
# Create a new column (target) shifted 'x' units/days up
df['Prediction'] = df[['Close']].shift(-future_days)
df.tail()
# Create the feature dataset(X) and convert it to a numpy array and remove the 'x' rows/days
X = np.array(df.drop(['Prediction'], axis = 1))[:-future_days]
X;
# Create a target data set (Y) and convert it to a numpy array
# and get all of the target values except the last 'X' rows/days
y = np.array(df['Prediction'])[:-future_days]
y;
# Split the data into 75% training and 25% testing
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
# Get the last 'x' rows of the feature dataset
x_future = df.drop(['Prediction'], axis = 1)[:-future_days]
x_future = x_future.tail(future_days)
x_future = np.array(x_future)
x_future;
# Fitting linear Regression to the dataset
from sklearn.linear_model import LinearRegression
lr = LinearRegression().fit(x_train, y_train)
# Fitting SVR to the dataset
from sklearn.svm import SVR
svrl = SVR(kernel = 'linear')
svrl.fit(x_train, y_train)
# Fitting SVR to the dataset
from sklearn.svm import SVR
svrp = SVR(kernel = 'poly')
svrp.fit(x_train, y_train)
# Fitting Decision Tree to the dataset
from sklearn.tree import DecisionTreeRegressor
tree = DecisionTreeRegressor()
tree = tree.fit(x_train, y_train)
# Fitting Random Forest Regression to the dataset
from sklearn.ensemble import RandomForestRegressor
forest = RandomForestRegressor(n_estimators = 20, criterion = 'mse', random_state = 0)
forest.fit(x_train, y_train)
# Fitting XGBoost Regression to the dataset
from xgboost import XGBRegressor
xgb = XGBRegressor()
xgb.fit(x_train, y_train)
# Show the model linear regression prediction
lr_prediction = lr.predict(x_future)
print('lr_prediction: ',lr_prediction)
# Show the model SVR linear regression prediction
svrl_prediction = svrl.predict(x_future)
print('svr1_prediction: ', svrl_prediction)
# Show the model SVR Poly prediction
svrp_prediction = svrp.predict(x_future)
print('svrp_prediction: ', svrp_prediction)
# Show the model tree prediction
tree_prediction = tree.predict(x_future)
print('tree_prediction: ', tree_prediction)
# Show the model Random Forest prediction
forest_prediction = forest.predict(x_future)
print('forest_prediction: ', forest_prediction)
# Show the XGBoost prediction
xgb_prediction = xgb.predict(x_future)
print('xgb_prediction: ', xgb_prediction)
print()
# Visualize the data
Predictions = lr_prediction
valid = df[X.shape[0]:]
valid['Prediction'] = Predictions
plt.figure(figsize = (16,8))
plt.title('Model')
plt.xlabel('Days')
plt.ylabel('Close Price USD')
plt.plot(df['Close'])
plt.plot(valid[['Close', 'Prediction']])
plt.legend(['Orig', 'Val', 'Pred'])
plt.show()
# Visualize the data
Predictions = svrl_prediction
valid = df[X.shape[0]:]
valid['Prediction'] = Predictions
plt.figure(figsize = (16,8))
plt.title('Model')
plt.xlabel('Days')
plt.ylabel('Close Price USD')
plt.plot(df['Close'])
plt.plot(valid[['Close', 'Prediction']])
plt.legend(['Orig', 'Val', 'Pred'])
plt.show()
# Visualize the data
Predictions = svrp_prediction
valid = df[X.shape[0]:]
valid['Prediction'] = Predictions
plt.figure(figsize = (16,8))
plt.title('Model')
plt.xlabel('Days')
plt.ylabel('Close Price USD')
plt.plot(df['Close'])
plt.plot(valid[['Close', 'Prediction']])
plt.legend(['Orig', 'Val', 'Pred'])
plt.show()
# Visualize the data
Predictions = tree_prediction
valid = df[X.shape[0]:]
valid['Prediction'] = Predictions
plt.figure(figsize = (16,8))
plt.title('Model')
plt.xlabel('Days')
plt.ylabel('Close Price USD')
plt.plot(df['Close'])
plt.plot(valid[['Close', 'Prediction']])
plt.legend(['Orig', 'Val', 'Pred'])
plt.show()
# Visualize the data
Predictions = forest_prediction
valid = df[X.shape[0]:]
valid['Prediction'] = Predictions
plt.figure(figsize = (16,8))
plt.title('Model')
plt.xlabel('Days')
plt.ylabel('Close Price USD')
plt.plot(df['Close'])
plt.plot(valid[['Close', 'Prediction']])
plt.legend(['Orig', 'Val', 'Pred'])
plt.show()
# Visualize the data
Predictions = xgb_prediction
valid = df[X.shape[0]:]
valid['Prediction'] = Predictions
plt.figure(figsize = (16,8))
plt.title('Model')
plt.xlabel('Days')
plt.ylabel('Close Price USD')
plt.plot(df['Close'])
plt.plot(valid[['Close', 'Prediction']])
plt.legend(['Orig', 'Val', 'Pred'])
plt.show()
It can be observe that Linear Regression and Support Vector Machines prediction are very far from the actual price. Meanwhile, the Decision Tree, Random Forest and XGBOOST model somewhat failed to predict starting days and last remaining days but in the middle days it seems to almost fit the prediction from the actual prices. Nevertheless, this can be further improve by tuning the right hyper parameter and trying different models such as Reinforcement Learning and Artificial Neural Network.
# Load the data
df = pd.read_csv('project_data/FB.csv')
df
# Get the number of rows and columns
df.shape
# Get and print the last row of data
actual_price = df.tail(1)
actual_price
# Prepare the data for training the models
# get all the data except for the last row
df = df.head(len(df)-1)
#print the new dataset
df
# Create empty list to store the independent and dependent data
days = list()
adj_close_prices = list()
# Get the date and adjusted close price
df_days = df.loc[:, 'Date']
df_adj_close = df.loc[:, 'Adj Close']
# Create the indepedent dataset
for day in df_days:
days.append( [int(day.split('-')[2])] )
# Create the dependent dataset
for adj_close_price in df_adj_close:
adj_close_prices.append( float(adj_close_price) )
#print the days and the adj close prices
print(days)
print(adj_close_prices)
# Create models
from sklearn.svm import SVR
# Create and train a SVR model using a linear kernel
lin_svr = SVR(kernel = 'linear', C= 1000.0)
lin_svr.fit(days, adj_close_prices)
# Create and train a SVR model using a linear kernel
poly_svr = SVR(kernel = 'poly', degree = 2, C= 1000.0)
poly_svr.fit(days, adj_close_prices)
# Create and train a SVR model using a linear kernel
rbf_svr = SVR(kernel = 'rbf', gamma = 0.15, C= 1000.0)
rbf_svr.fit(days, adj_close_prices)
# Fitting Decision Tree to the dataset
from sklearn.tree import DecisionTreeRegressor
tree = DecisionTreeRegressor()
tree = tree.fit(days, adj_close_prices)
# Fitting Random Forest Regression to the dataset
from sklearn.ensemble import RandomForestRegressor
forest = RandomForestRegressor(n_estimators = 20, criterion = 'mse', random_state = 0)
forest.fit(days, adj_close_prices)
# Plot the model on the graph to see which has the best fit on the original data
plt.figure(figsize=(16,8))
plt.scatter(days, adj_close_prices, color = 'red', label = 'data')
plt.plot(days, rbf_svr.predict(days), color = 'green', label = 'RBF Model')
plt.plot(days, poly_svr.predict(days), color = 'orange', label = 'Polynomial Model')
plt.plot(days, lin_svr.predict(days), color = 'blue', label = 'Linear Model')
plt.legend()
plt.show()
# Plot the model on the graph to see which has the best fit on the original data
plt.figure(figsize=(16,8))
plt.scatter(days, adj_close_prices, color = 'red', label = 'data')
plt.plot(days, tree.predict(days), color = 'green', label = 'Decision Tree Model')
plt.plot(days, forest.predict(days), color = 'orange', label = 'Random Forest Model')
plt.legend()
plt.show()
# Show the predicted price for the given day
day = [[30]]
print('The Linear SVR predicted:', lin_svr.predict(day))
print('The Polynomial SVR predicted:', poly_svr.predict(day))
print('The RBF SVR predicted:', rbf_svr.predict(day))
print('The Decision Tree predicted:', tree.predict(day))
print('The Random Forest predicted:', forest.predict(day))
# print the actual price of the stock on day 30
print('The actual price:', actual_price['Adj Close'][20])
The Decision Tree and Random Forest model has the closer value in predicting the actual price. Machine learning can be used as a range of guidance in how much will the stock will go up or down but can not guarantee for a high accuracy and precision. There can be a lot of factors to be consider when it comes to predicting the stock market prices.