-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
168 lines (139 loc) · 8.35 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# importing required libraries
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.preprocessing import LabelEncoder
from prediction import predict
df=pd.read_csv('train.csv')
test1=pd.read_csv('test.csv')
# We need to remove building_id in both train and test files because it's doesn't change prediction
df=df.drop(['building_id','geo_level_1_id','geo_level_2_id','geo_level_3_id'],axis=1)
test1=test1.drop(['building_id','geo_level_1_id','geo_level_2_id','geo_level_3_id'],axis=1)
# converting datatype in train file
int_columns = ['count_floors_pre_eq','has_superstructure_adobe_mud','has_superstructure_mud_mortar_stone', 'has_superstructure_stone_flag',
'has_superstructure_cement_mortar_stone','has_superstructure_mud_mortar_brick','has_superstructure_cement_mortar_brick',
'has_superstructure_timber','has_superstructure_bamboo', 'has_superstructure_rc_non_engineered',
'has_superstructure_rc_engineered', 'has_superstructure_other', 'count_families', 'has_secondary_use',
'has_secondary_use_agriculture', 'has_secondary_use_hotel','has_secondary_use_rental', 'has_secondary_use_institution',
'has_secondary_use_school', 'has_secondary_use_industry','has_secondary_use_health_post', 'has_secondary_use_gov_office',
'has_secondary_use_use_police', 'has_secondary_use_other','damage_grade']
df[int_columns] = df[int_columns].astype('object')
# converting datatypes in test file
int_columns_tt = ['count_floors_pre_eq','has_superstructure_adobe_mud','has_superstructure_mud_mortar_stone', 'has_superstructure_stone_flag',
'has_superstructure_cement_mortar_stone','has_superstructure_mud_mortar_brick','has_superstructure_cement_mortar_brick',
'has_superstructure_timber','has_superstructure_bamboo', 'has_superstructure_rc_non_engineered',
'has_superstructure_rc_engineered', 'has_superstructure_other', 'count_families', 'has_secondary_use',
'has_secondary_use_agriculture', 'has_secondary_use_hotel','has_secondary_use_rental', 'has_secondary_use_institution',
'has_secondary_use_school', 'has_secondary_use_industry','has_secondary_use_health_post', 'has_secondary_use_gov_office',
'has_secondary_use_use_police', 'has_secondary_use_other']
test1[int_columns_tt] = test1[int_columns_tt].astype('object')
# splitting into numcols and objcols
numcols=df.select_dtypes(np.number)
objcols=df.select_dtypes('object')
from sklearn.preprocessing import MinMaxScaler
mm=MinMaxScaler()
numcols_mm=mm.fit_transform(numcols)
numcols_mm=pd.DataFrame(numcols_mm,columns=numcols.columns)
# splitting into numcols and objcols test file
numcols1=test1.select_dtypes(np.number)
objcols1=test1.select_dtypes('object')
numcols_mm_tt=mm.fit_transform(numcols1)
numcols_mm_tt=pd.DataFrame(numcols_mm_tt,columns=numcols1.columns)
# label encoding the object variables
from sklearn.preprocessing import LabelEncoder
lb=LabelEncoder()
objcols=objcols.apply(LabelEncoder().fit_transform)
objcols1=objcols1.apply(LabelEncoder().fit_transform)
# combining the both numcols and objcols
combinedf=pd.concat([numcols,objcols],axis=1)
test1=pd.concat([numcols_mm_tt,objcols1],axis=1)
# splitting into dependent varibale(y) and independent variables(X's)
X=combinedf.drop('damage_grade',axis=1)
y=combinedf.damage_grade
from imblearn.over_sampling import SMOTE
sm=SMOTE()
X,y=sm.fit_resample(X,y)
# Decission Tree
from sklearn.tree import DecisionTreeClassifier
tree=DecisionTreeClassifier(max_depth=50, criterion='entropy',splitter='random')
treemodel=tree.fit(X,y)
treemodel.score(X,y)
tree_X=treemodel.predict(X)
import joblib
joblib.dump(tree,'tree_model_smote.sav')
# -------------
# page setup
st.set_page_config(
page_title="Richter's Predictor: Modeling Earthquake Damage",
page_icon="🌎",
layout='wide'
)
#Title
st.title("Earthquake Building Damage Grade Prediction")
# Sidebar with input fields
st.header('Details of the building')
# Collecting input variables
#building_id = st.number_input('Building ID', min_value=0)
#geo_level_1_id = st.selectbox('Geo Level 1 ID', options=list(range(31)))
#geo_level_2_id = st.number_input('Geo Level 2 ID', min_value=0, max_value=1427)
#geo_level_3_id = st.number_input('Geo Level 3 ID', min_value=0, max_value=12567)
col1,col2,col3,col4,col5,col6,col7=st.columns(7)
with col1:
age = st.number_input('Age of the Building (Years)', min_value=0,max_value=60)
area_percentage = st.number_input('Area Percentage', min_value=0, max_value=100)
height_percentage = st.number_input('Height Percentage', min_value=0,max_value=10)
count_floors_pre_eq = st.number_input('Number of Floors Pre-Earthquake', min_value=0)
land_surface_condition = st.selectbox('Land Surface Condition', options=['n', 'o', 't'])
with col2:
foundation_type = st.selectbox('Foundation Type', options=['h', 'i', 'r', 'u', 'w'])
roof_type = st.selectbox('Roof Type', options=['n', 'q', 'x'])
ground_floor_type = st.selectbox('Ground Floor Type', options=['f', 'm', 'v', 'x', 'z'])
other_floor_type = st.selectbox('Other Floor Type', options=['j', 'q', 's', 'x'])
position = st.selectbox('Position', options=['j', 'o', 's', 't'])
with col3:
plan_configuration = st.selectbox('Plan Configuration', options=['a', 'c', 'd', 'f', 'm', 'n', 'o', 'q', 's', 'u'])
has_superstructure_adobe_mud = st.selectbox('Has Superstructure Adobe Mud',options=['0','1'])
has_superstructure_mud_mortar_stone = st.selectbox('Has Superstructure Mud Mortar Stone',options=['0','1'])
has_superstructure_stone_flag = st.selectbox('Has Superstructure Stone Flag',options=['0','1'])
has_superstructure_cement_mortar_stone = st.selectbox('Has Superstructure Cement Mortar Stone',options=['0','1'])
with col4:
has_superstructure_mud_mortar_brick = st.selectbox('Has Superstructure Mud Mortar Brick',options=['0','1'])
has_superstructure_cement_mortar_brick = st.selectbox('Has Superstructure Cement Mortar Brick',options=['0','1'])
has_superstructure_timber = st.selectbox('Has Superstructure Timber',options=['0','1'])
has_superstructure_bamboo = st.selectbox('Has Superstructure Bamboo',options=['0','1'])
has_superstructure_rc_non_engineered = st.selectbox('Has Superstructure RC Non-Engineered',options=['0','1'])
with col5:
has_superstructure_rc_engineered = st.selectbox('Has Superstructure RC Engineered',options=['0','1'])
has_superstructure_other = st.selectbox('Has Superstructure Other',options=['0','1'])
legal_ownership_status = st.selectbox('Legal Ownership Status', options=['a', 'r', 'v', 'w'])
count_families = st.number_input('Number of Families', min_value=0, max_value=10)
has_secondary_use = st.selectbox('Has Secondary Use',options=['0','1'])
with col6:
has_secondary_use_agriculture = st.selectbox('Has Secondary Use Agriculture',options=['0','1'])
has_secondary_use_hotel = st.selectbox('Has Secondary Use Hotel',options=['0','1'])
has_secondary_use_rental = st.selectbox('Has Secondary Use Rental',options=['0','1'])
has_secondary_use_institution = st.selectbox('Has Secondary Use Institution',options=['0','1'])
has_secondary_use_school = st.selectbox('Has Secondary Use School',options=['0','1'])
with col7:
has_secondary_use_industry = st.selectbox('Has Secondary Use Industry',options=['0','1'])
has_secondary_use_health_post = st.selectbox('Has Secondary Use Health Post',options=['0','1'])
has_secondary_use_gov_office = st.selectbox('Has Secondary Use Gov Office',options=['0','1'])
has_secondary_use_use_police = st.selectbox('Has Secondary Use Police',options=['0','1'])
has_secondary_use_other = st.selectbox('Has Secondary Use Other',options=['0','1'])
# -------------------------------------------------------------------------------------------------------------------------------------
import joblib
def predict(data):
clf = joblib.load('tree_model_smote.sav')
return clf.predict(data)
#-------------------------------------------------------------------------
# Prediction
if st.button('Predict'):
prediction = predict(np.array(X))
prediction = (prediction[0])
if prediction == 0:
st.markdown(f"The building damage condition is Low")
elif prediction == 1:
st.markdown(f"The building damage condition is Medium")
else:
st.markdown(f"The building damage condition is High")
st.balloons()