-
Notifications
You must be signed in to change notification settings - Fork 11
/
neural_network.py
140 lines (114 loc) · 5.46 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import pandas as pd
from sqlalchemy import create_engine
# Credentials to connect to the database
username = "username"
password = "DB_password"
hostname = "DB_host"
dbname = "DB_name"
# Split the training and validation datasets using the valid_fraction
def get_data_splits(dataframe, valid_fraction=0.2):
valid_size = int(len(dataframe) * valid_fraction)
if valid_size < 1:
valid_size = 1
train = dataframe[:-valid_size]
valid = dataframe[-valid_size:]
return train, valid
def neural_network(nodes, input_length):
'''
Create the neural network
'''
model = Sequential()
model.add(Dense(nodes, input_dim=input_length, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='relu'))
# Compile model
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=["mae"])
return model
# Create training and validation datasets
def create_train_valid_set():
# Connect to the database of the e-shop
engine = create_engine("mysql+mysqlconnector://{user}:{password}@{host}/{dbname}"
.format(user=username,
password=password,
host=hostname,
dbname=dbname))
nn_data = pd.read_sql_table("nn_data", engine)
nn_data = nn_data.loc[:, nn_data.columns != "week"]
nn_data = nn_data.loc[:, nn_data.columns != "product_cost"]
nn_data = nn_data.loc[:, nn_data.columns != "product_max_bound"]
train = pd.DataFrame(columns=nn_data.columns)
valid = pd.DataFrame(columns=nn_data.columns)
for product in nn_data.product_id.unique():
dataframe = nn_data.loc[nn_data.product_id == product]
std = dataframe.iloc[:, -1].std()
mean = dataframe.iloc[:, -1].mean()
if std <= mean:
temp_train, temp_valid = get_data_splits(dataframe)
train = train.append(temp_train, ignore_index=True)
valid = valid.append(temp_valid, ignore_index=True)
X_train = train.iloc[:, 0:-1]
y_train = train.iloc[:, -1]
X_valid = valid.iloc[:, 0:-1]
y_valid = valid.iloc[:, -1]
product_encoder = LabelEncoder()
product_encoder.fit(X_train["product_id"])
X_train["product_id"] = product_encoder.transform(X_train["product_id"])
X_valid["product_id"] = product_encoder.transform(X_valid["product_id"])
return X_train, X_valid, y_train, y_valid
# Test the neural network and it's performance
def nn_testing():
X_train, X_valid, y_train, y_valid = create_train_valid_set()
model = neural_network(23, X_train.shape[1])
history = model.fit(X_train, y_train,
epochs=100, batch_size=256,
validation_data=[X_valid, y_valid],
verbose=0)
history_dict = history.history
# Plots model's training cost/loss and model's validation split cost/loss
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
plt.plot(loss_values, label='training loss')
plt.plot(val_loss_values, label='val loss')
plt.legend()
plt.show()
y_train_pred = np.round_(model.predict(X_train))
y_valid_pred = np.round_(model.predict(X_valid))
train_mae = mean_absolute_error(y_train, y_train_pred)
valid_mae = mean_absolute_error(y_valid, y_valid_pred)
# Calculates and prints r2 score of training and validation data
print("The R2 score on the Train set is: ", r2_score(y_train, y_train_pred))
print("The R2 score on the Valid set is: ", r2_score(y_valid, y_valid_pred))
# Calculates and prints mae of training and validation data
print("The mae on the Train set is:\t{:0.3f}".format(train_mae))
print("The mean of the Train set is: ", y_train.mean())
print("The percentage of mae on Train set is: ", (train_mae / y_train.mean()) * 100)
print("The mae on the Valid set is:\t{:0.3f}".format(valid_mae))
print("The mean of the Valid set is: ", y_valid.mean())
print("The percentage of mae on Valid set is: ", (valid_mae / y_valid.mean()) * 100)
# Final training of the neural network
def nn_final_training():
engine = create_engine("mysql+mysqlconnector://{user}:{password}@{host}/{dbname}"
.format(user="kvavliak",
password="DimKvavliak$789",
host="dimvas.pharm24.gr",
dbname="web_db"))
nn_data = pd.read_sql_table("nn_data", engine)
nn_data = nn_data.loc[:, nn_data.columns != "week"]
nn_data = nn_data.loc[:, nn_data.columns != "product_cost"]
nn_data = nn_data.loc[:, nn_data.columns != "product_max_bound"]
X_train = nn_data.iloc[:, 0:-1]
y_train = nn_data.iloc[:, -1]
product_encoder = LabelEncoder()
X_train["product_id"] = product_encoder.fit_transform(X_train["product_id"])
model = neural_network(23, X_train.shape[1])
model.fit(X_train, y_train,
epochs=50, batch_size=16,
verbose=0)
model.save("final_model.h5")
return product_encoder