In this project, machine learning and deep learning models will be train to predict the percentage of Silica Concentrate in the Iron ore concentrate per minute. This project could be practically used in Mining Industry to get the percentage of Silica Concentrate at a much faster rate compared to the traditional methods.
Freeport-McMoran is one of the largest mining companies in U.S.. They have made use of custom built AI model to boost the output in their mining facilities.
The 4th Industrial Revolution: How Mining Companies Are Using AI, Machine Learning And Robots:
Problem:
Dataset:
Source: Kaggle Competition
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import zipfile
# Import data
mining_df = pd.read_csv('mining_data.csv')
# Check data
mining_df.head()
# Check data statistics
mining_df.describe()
# Check data types
mining_df.dtypes
# Check for missing values
mining_df.isnull().mean()
# Create bar chart
mining_df.hist(bins = 30, figsize = (20,20), color = 'b')
plt.show()
# Import library
from heatmap import corrplot
# Show correlation in the features by using heatmap
plt.figure(figsize=(12, 8))
corrplot(mining_df.corr(), size_scale=300)
# Create scatterplot
sns.scatterplot(mining_df['% Silica Concentrate'], mining_df['% Iron Concentrate'])
plt.show()
# Create scatterplot
sns.scatterplot(mining_df['% Iron Feed'], mining_df['% Silica Feed'])
plt.show()
# Label Independent variables
df_iron = mining_df.drop(columns = '% Silica Concentrate')
# Label dependent variables
df_iron_target = mining_df['% Silica Concentrate']
# Check Independent variables shape
df_iron.shape
# Check dependent variables shape
df_iron_target.shape
# Transform into array
df_iron = np.array(df_iron)
# Transform into array
df_iron_target = np.array(df_iron_target)
# Reshaping the array
df_iron_target = df_iron_target.reshape(-1,1)
# Check dimension
df_iron_target.shape
# Import library
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Scale the independent variables
scaler_x = StandardScaler()
X = scaler_x.fit_transform(df_iron)
# Scale the dependent variables
scaler_y = StandardScaler()
y = scaler_y.fit_transform(df_iron_target)
# Import libraru
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Check train dataset shape
X_train.shape, y_train.shape
# Check train dataset shape
X_test.shape, y_test.shape
# Import libraries
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, accuracy_score
# Train the model
LinearRegression_model = LinearRegression()
LinearRegression_model.fit(X_train, y_train)
# Check Linear Regression model accuracy
accuracy_LinearRegression = LinearRegression_model.score(X_test, y_test)
accuracy_LinearRegression
# Import lirbary
from sklearn.tree import DecisionTreeRegressor
# Train the model
DecisionTree_model = DecisionTreeRegressor()
DecisionTree_model.fit(X_train, y_train)
# Check Decision Tree model accuracy
accuracy_DecisionTree = DecisionTree_model.score(X_test, y_test)
accuracy_DecisionTree
# Import library
from xgboost import XGBRegressor
# Train the model
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
# Check XGBoost model accuracy
xgb.score(X_test, y_test)
# Import library
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam
# Create model architecture
optimizer = Adam(learning_rate=0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-07, amsgrad = False)
ANN_model = keras.Sequential()
ANN_model.add(Dense(250, input_dim = 22, kernel_initializer='normal',activation='relu'))
ANN_model.add(Dense(500,activation = 'relu'))
ANN_model.add(Dropout(0.1))
ANN_model.add(Dense(1000, activation = 'relu'))
ANN_model.add(Dropout(0.1))
ANN_model.add(Dense(1000, activation = 'relu'))
ANN_model.add(Dropout(0.1))
ANN_model.add(Dense(500, activation = 'relu'))
ANN_model.add(Dropout(0.1))
ANN_model.add(Dense(250, activation = 'relu'))
ANN_model.add(Dropout(0.1))
ANN_model.add(Dense(1, activation = 'linear'))
# Compile the model
ANN_model.compile(loss = 'mse', optimizer = 'adam')
# Check modelsummary
ANN_model.summary()
# Initiate model training
history = ANN_model.fit(X_train, y_train, epochs = 5, validation_split = 0.2)
# Evaluate the ANN model
result = ANN_model.evaluate(X_test, y_test)
accuracy_ANN = 1 - result
print("Accuracy : {}".format(accuracy_ANN))
# From the above results, it can be seen that, decision tree model out-performs the other models.
# Plot True values and Model Predictions
y_predict = DecisionTree_model.predict(X_test)
plt.plot(y_predict, y_test, '^', color = 'b')
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
plt.show()
# Plot True values and Model Predictions with their proper range
y_predict_orig = scaler_y.inverse_transform(y_predict)
y_test_orig = scaler_y.inverse_transform(y_test)
plt.plot(y_test_orig, y_predict_orig, "^", color = 'r')
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
plt.show()
# Import libraries
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from math import sqrt
# Check evaluation metrics
k = X_test.shape[1]
n = len(X_test)
RMSE = float(format(np.sqrt(mean_squared_error(y_test_orig, y_predict_orig)),'.3f'))
MSE = mean_squared_error(y_test_orig, y_predict_orig)
MAE = mean_absolute_error(y_test_orig, y_predict_orig)
r2 = r2_score(y_test_orig, y_predict_orig)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)
print('RMSE =',RMSE, '\nMSE =',MSE, '\nMAE =',MAE, '\nR2 =', r2, '\nAdjusted R2 =', adj_r2)
The Decision Tree Model was able to obtained a 98% accuracy in prediction and also outperforms an ANN model. Although the ANN model was only trained in five iterations and can bu further improve by adding more layers, it will still take a lot of time to train and consume a lot of computing power as well. Due to the good scores in evaluation metrics the model can be deploy into the production to save process engineers a lot of time in their workload.