-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute.py
279 lines (207 loc) · 9.25 KB
/
compute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
"""
hold all of the computations that are preformed on the collected data
"""
from typing import List, Tuple, Any
from math import log, sqrt
from matrix import Matrix
def add_values(val1: Tuple[List[Any], List[int]], val2: Tuple[List[Any], List[int]]) -> \
Tuple[List[str], List[int]]:
"""
Calculate the combined average of the inputted values for the days that have data available.
Preconditions:
- len(val1[1]) > 0
- len(val2[1]) > 0
>>> a = (['2019010101', '2019010103', '2019010104', '2019010105'], [35, 34, 36, 34])
>>> b = (['2019010101', '2019010102', '2019010103', '2019010105'], [4, 5, 5, 4])
>>> add_values(a, b)
(['2019010101', '2019010103', '2019010105'], [39, 39, 38])
"""
# resulting dates and summed values
date_list = []
sum_list = []
for i in range(0, len(val1[0])):
for j in range(0, len(val2[0])):
# matching the dates
if val1[0][i] == val2[0][j]:
# calculating summed value
date_list.append(val1[0][i])
sum_list.append(val1[1][i] + val2[1][j])
return (date_list, sum_list)
def calculate_average_collection(points: tuple) -> tuple:
"""Return the two averages of a collection of numbers passed in as a tuple.
points is a tuple of two lists: ([x_1, x_2 ...], [y_1, y_2, ...])
This function returns the averages of the two lists in the form of a tuple.
You may ASSUME that:
- both lists are not empty
>>> calculate_average_collection(([1, 2, 3], [1, 2, 3]))
(2.0, 2.0)
>>> calculate_average_collection(([10.0, 20.0, 30.0], [3.0, 6.0, 12.0]))
(20.0, 7.0)
"""
return (sum(points[0]) / len(points[0]), sum(points[1]) / len(points[1]))
def calculate_r_squared(points: Tuple[List[float], List[float]], a: float, b: float) -> float:
"""Return the R squared value when the given points are modelled as the line y = a + bx.
points is a list of pairs of numbers: [(x_1, y_1), (x_2, y_2), ...]
Assume that:
- points is not empty and contains tuples
- each element of points is a tuple containing two floats
"""
y_bar = calculate_average_collection(points)[1]
s_tot = sum([(y - y_bar) ** 2 for y in points[1]])
s_res = sum([(points[1][i] - (a + b * points[0][i])) ** 2 for i in range(len(points[0]))])
r_squared = 1 - (s_res / s_tot)
return r_squared
def simple_linear_regression(points: Tuple[List[float], List[float]]) -> tuple:
"""Perform a linear regression on the given points.
points is a list of pairs of floats: [(x_1, y_1), (x_2, y_2), ...]
This function returns a pair of floats (a, b) such that the line
y = a + bx is the approximation of this data.
Further reading: https://en.wikipedia.org/wiki/Simple_linear_regression
You may ASSUME that:
- len(points) > 0
- each element of points is a tuple of two floats
>>> simple_linear_regression(([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]))
(0.0, 1.0)
>>> simple_linear_regression(([10.0, 20.0, 30.0], [3.0, 6.0, 12.0]))
(-2.0, 0.45)
Hint: you might want to define a separate function that calculates the average
of a collection of numbers.
"""
x_bar, y_bar = calculate_average_collection(points)
x_bracket = [x - x_bar for x in points[0]]
y_bracket = [y - y_bar for y in points[1]]
b_top = sum(x_bracket[i] * y_bracket[i] for i in range(len(x_bracket)))
b_bottom = sum(x_b ** 2 for x_b in x_bracket)
b = b_top / b_bottom
a = y_bar - b * x_bar
return (a, b)
def polynomial_regression(deg: int, x_val: List[float], y_val: List[float]) -> List[float]:
"""
Calculate the coefficients of a polynomial function with degree deg
that best fits the input data
the output is ordered in increasing degree
Preconditions:
- deg > 0
- len(x_val) > 0
- len(x_val) == len(y_val)
>>> polynomial_regression(2, [10, 20, 30, 40, 50, 60, 0.354, 0.38, 0.405, 0.43],\
[0.1245, 0.185, 0.2285, 0.265, 0.297, 0.3285, 70, 80, 90, 100])
[80.44857677401095, -4.871378463853153, 0.06236668350454819]
>>> polynomial_regression(1, [1985, 1990, 1995, 2000, 2005, 2010, 2015],\
[116576, 130482, 145562, 155328, 158401, 159440, 167543])
[-3047466.8571428573, 1597.5428571428572]
"""
matrix_r = Matrix(deg + 1, deg + 1)
matrix_l = [[]]
# fills the coefficient matrix
for row in range(0, (deg + 1)):
for column in range(0, deg + 1):
matrix_l[row].append(sum([pow(x, row + column) for x in x_val]))
if row != deg:
matrix_l.append([])
constant_vec = []
x_temp = 0
# fills the column vector of constants
for row in range(0, (deg + 1)):
for i in range(0, len(x_val)):
x_temp += pow(x_val[i], row) * y_val[i]
constant_vec.append(x_temp)
x_temp = 0
matrix_r.set_matrix(matrix_l)
return matrix_r.solve(constant_vec)
def exponential_regression(x_val: List[float], y_val: List[float]) -> List[float]:
"""
Calculates the coefficients of an exponential function that best fits
the given input
output is a list in the form [a, b], where y = b * a^x
Preconditions:
- len(x_val) > 0
- len(x_val) == len(y_val)
>>> exponential_regression([1985, 1990, 1995, 2000, 2005, 2010, 2015] ,\
[56593071, 56719240, 56844303, 56942108, 57969485, 59277417, 61336387])
[1.0024980484455097, 394317.0737279723]
"""
valids = [log(y) for y in y_val if y != 0]
place_avg = sum(valids) / len(valids)
log_values = []
for value in y_val:
if value != 0:
log_values.append(log(value, 10))
else:
log_values.append(place_avg)
coefficients = polynomial_regression(1, x_val, log_values)
return [pow(10, coefficients[1]), pow(10, coefficients[0])]
def standard_deviation(points: Tuple[List[float], List[float]]) -> float:
"""Return the standard deviation of a sample of data passed in as a tuple of floats.
points is a tuple of two lists: ([x_1, x_2 ...], [y_1, y_2, ...])
This function returns the standard deviation of the sample of data as a float.
You may ASSUME that:
- both lists are not empty
>>> points = ([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [10.0, 12.0, 23.0, 23.0, 16.0, 23.0, 21.0, 16.0])
>>> standard_deviation(points)
5.237229365663817
"""
x_coords, y_coords = points[0], points[1]
# Finding the mean of the y values in the data set:
mean = sum(y_coords) / len(x_coords)
# Calculating the sum of the square of every y value minus the mean y value
total = 0
for y in y_coords:
total = total + ((y - mean) ** 2)
# Dividing the total by the number of x values minus 1 and then taking the square root
standard_dev = sqrt(total / (len(x_coords) - 1))
return standard_dev
def relative_standard_deviation(points: Tuple[List[float], List[float]]) -> float:
"""Return the relative standard deviation of a sample of data passed in as a tuple of floats.
points is a tuple of two lists: ([x_1, x_2 ...], [y_1, y_2, ...])
This function returns the standard deviation of the sample of data as a float.
You may ASSUME that:
- both lists are not empty
>>> points = ([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [10.0, 12.0, 23.0, 23.0, 16.0, 23.0, 21.0, 16.0])
>>> relative_standard_deviation(points)
29.095718698132316
"""
x_coords, y_coords = points[0], points[1]
# Finding the mean of the y values in the sample data set:
mean = sum(y_coords) / len(x_coords)
# Calculating the standard deviation of a sample size:
standard_dev = standard_deviation(points)
# Determining the percentage of the standard deviation by the mean
rel_standard_dev = (standard_dev / mean) * 100
return rel_standard_dev
def gen_points_matching_date(data1: Tuple[List[Any], List[float]],
data2: Tuple[List[Any], List[float]]) -> Tuple[List[int], List[int]]:
"""
This function returns plots for O3 vs NO2 using matching dates
NOTE: data1 y-values become the x-values, and data2 y-values become the y-values
NOTE: x-values are sorted in ascending order for the plotting software
Preconditions:
- len(data1) > 0 and len(data2) > 0
- len(data1[0]) == len(data1[1]) and len(data2[0]) == len(data2[1])
>>> gen_points_matching_date((['a', 'b', 'd'], [1, 2, 3]), (['a', 'b', 'c'], [4, 5, 6]))
([1, 2], [4, 5])
"""
x_cor, y_cor = [], []
for i in range(len(data1[0])):
date = data1[0][i]
for j in range(len(data2[0])):
other_date = data2[0][j]
if date == other_date:
x_cor.append(int(data1[1][i]))
y_cor.append(int(data2[1][j]))
zippy = zip(x_cor, y_cor)
x_cor, y_cor = [list(a) for a in zip(*sorted(zippy))]
return (x_cor, y_cor)
if __name__ == '__main__':
import python_ta
python_ta.check_all(config={
'extra-imports': ['matrix', 'math', 'python_ta.contracts'],
'allowed-io': [],
'max-line-length': 100,
'disable': ['R1705', 'C0200'],
})
import python_ta.contracts
python_ta.contracts.DEBUG_CONTRACTS = False
python_ta.contracts.check_all_contracts()
import doctest
doctest.testmod(verbose=True)