-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_hyperpametertuning.py
96 lines (81 loc) · 4.1 KB
/
run_hyperpametertuning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#Imports
from mixture_model import MixtureModel
from mdp import MDP
from tabulate import tabulate
import matplotlib.pyplot as plt
import numpy as np
import os, shutil
#Constants
PATH='dataset'
VALUE_ITERATION_MODEL_PATH='saved_models/value-iteration'
RANDOMIZED_MODEL_PATH='saved_models/randomized-algo'
SARSA_MODEL_PATH='saved_models/sarsa'
TD_MODEL_PATH = 'saved_models/td'
QLEARNING_MODEL_PATH = 'saved_models/Q'
######### Perform Hyper-Parameter Tuning #################
hyper_parameters_grid_search = {
'alpha' : [0.01, 0.99],
'k' : [4],
'discountrate' : [0.01, 0.99],
'beta_weight' : [0.01, 0.99]
}
def graph_iteration_vs_reward(model, model_load_path, k=4, m=10, with_comparison=False):
print('Plotting for ', model)
files = os.listdir(model_load_path)
files.sort()
num_rows=len(files)
row=0
fig, axs = plt.subplots(num_rows, 1, figsize=(8, 2*num_rows)) # Adjust figsize as needed
title = model_load_path[13:]
fig.suptitle(f'Iteration vs Reward for {title}', fontsize=16)
for file in files:
rs = MDP(path='dataset', k=1, save_path=model_load_path)
rs.load(file)
k = file[-5]
y = rs.iteration_vs_reward
x = [i+1 for i in range(len(y))]
axs[row].plot(x, y)
axs[row].set_title(title+',k='+k)
axs[row].set_xlabel('Iteration')
axs[row].set_ylabel('Reward')
row += 1
np.savez('hyper-parametersgraph/'+model+'_'+k, iteration=x, reward=y)
plt.tight_layout()
plt.savefig('hyper-parametersgraph/'+model+'_iteration_vs_reward.png')
plt.show()
#Remove from Saved_Models
shutil.rmtree(model_load_path)
#2D Grid Search
for alpha in hyper_parameters_grid_search['alpha']:
for discountrate in hyper_parameters_grid_search['discountrate']:
for beta_weight in hyper_parameters_grid_search['beta_weight']:
# #Hyper-Parameter Tuning for Value Iteration
k=max(hyper_parameters_grid_search['k'])
print(f'Hyperparameters Tuning - α={alpha}, β={beta_weight}, γ={discountrate},k={k}')
saved_file_suffix = f'_α={alpha},β={beta_weight},γ={discountrate}'
# mixture_model = MixtureModel(path='dataset', k=k, alpha=alpha, beta_weight=beta_weight,verbose=True, save_path=VALUE_ITERATION_MODEL_PATH+saved_file_suffix)
# mixture_model.generate_model(max_iteration=10000)
# graph_iteration_vs_reward('value_iteration_'+saved_file_suffix,VALUE_ITERATION_MODEL_PATH+saved_file_suffix, k=k, m=10)
# for i in range(k):
# mm = MDP(PATH, k=i+1, save_path=TD_MODEL_PATH+saved_file_suffix)
# mm.initialise_mdp()
# mm.td_learning_for_optimal_policies(N=100)
# graph_iteration_vs_reward('td_'+saved_file_suffix,TD_MODEL_PATH+saved_file_suffix, k=k, m=10)
# for i in range(k):
# mm = MDP(PATH, k=i+1, save_path=QLEARNING_MODEL_PATH+saved_file_suffix)
# mm.initialise_mdp()
# mm.q_learning_for_optimal_policies(N=100)
# graph_iteration_vs_reward('q_'+saved_file_suffix,QLEARNING_MODEL_PATH+saved_file_suffix, k=k, m=10)
for i in range(k):
mm = MDP(PATH, k=i+1, save_path=SARSA_MODEL_PATH+saved_file_suffix)
mm.initialise_mdp()
mm.sarsa_for_optimal_policies(N=100)
graph_iteration_vs_reward('sarsa_'+saved_file_suffix,SARSA_MODEL_PATH+saved_file_suffix, k=k, m=10)
# #Hyper-Paramter Tuning for Rest of Approaches
# for k in hyper_parameters_grid_search['k']:
# print(f'Hyperparameters Tuning - α={alpha}, β={beta_weight}, γ={discountrate},k={k}')
# saved_file_suffix = f'_α={alpha},β={beta_weight},γ={discountrate}'
# mm = MDP(PATH, k=k, save_path=RANDOMIZED_MODEL_PATH+saved_file_suffix)
# mm.initialise_mdp()
# mm.randomized_algorithm_for_optimal_policies(N=100)
# graph_iteration_vs_reward('randomized_algo_'+saved_file_suffix,RANDOMIZED_MODEL_PATH+saved_file_suffix, k=k, m=10)