-
Notifications
You must be signed in to change notification settings - Fork 4
/
ssb_api_helper.py
139 lines (101 loc) · 4.02 KB
/
ssb_api_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from pyjstat import pyjstat
import requests
import json
import pandas as pd
import time
def ssb_parse_url(table_num):
"""Function that creates the URL to SSB api (internal function)
Args:
table_num (str): table number of StatBank to acquire
Returns:
str: URL address of the table
"""
return 'http://data.ssb.no/api/v0/en/table/' + str(table_num)
def ssb_get_var_info(table_num):
"""Get available variables from the table
Args:
table_num (str): table number of the Statbank to inquire
Returns:
cell: {variable_code: list of vars} to be used <ssb_parse_query> function
"""
POST_URL = ssb_parse_url(table_num)
metadata = requests.get(url=POST_URL)
metadata = json.loads(metadata.text)
variables = metadata['variables']
return {var["code"]:var["values"] for var in variables}
def ssb_parse_query(variables):
"""Parses query to send to SSB api
Args:
variables (cell): {variable_code: list of vars} to get from the table
Returns:
str: query to send to SSB api
Note:
One should edit the results from <ssb_get_var_info> function that
populates what variable codes and variables are available from the table
"""
payload = { "query": [{ "code": var_key,
"selection": {"filter":"item",
"values": var_val}}
for var_key, var_val in variables.items()],
"response": {
"format": "json-stat2"
}
}
return payload
def ssb_get_table(table_num, n_step = 1, vars_cell = 1, wait_time = 10):
"""Get data from SSB api
Args:
table_num (str): table number of the Statbank to inquire
n_step (int): (default: 1) Number of separate calls to make to SSB
vars_cell (cell): {variable_code:list of vars} to get; if not specified
get all variables
wait_time (float): (default: 5 sec) wait time in seconds between calls
Return:
pandas.dataframe: dataframe of the table
Note:
Statbank puts a limit on the number of cells one can download in one API
call. Hence, increase n_step if the API return error code 403.
See also:
- rotate_table
"""
if not isinstance(vars_cell, dict):
vars_cell = ssb_get_var_info(table_num)
print('Block 1 of ' + str(n_step))
POST_URL = ssb_parse_url(table_num)
payload = ssb_parse_query(vars_cell)
### Ugly patch by looping that works for now
block = len(vars_cell[payload["query"][-1]["code"]])//n_step
df = []
for iter in range(n_step-1):
payload["query"][-1]["selection"]["values"] = vars_cell[payload["query"][-1]["code"]][block*iter:(iter+1)*block]
start_time = time.time()
response = requests.post(POST_URL, json = payload, timeout=5)
print(response)
dataset = pyjstat.Dataset.read(response.text)
df = df + [dataset.write('dataframe')]
process_time = time.time() - start_time
if process_time < wait_time:
print("Pause before making the next call")
time.sleep(wait_time-process_time)
print('Block ' + str(iter+2) + ' of ' + str(n_step))
payload["query"][-1]["selection"]["values"] = vars_cell[payload["query"][-1]["code"]][(n_step-1)*block:]
response = requests.post(POST_URL, json = payload, timeout=5)
print(response)
dataset = pyjstat.Dataset.read(response.text)
df = df + [dataset.write('dataframe')]
df = pd.concat(df)
###
return df
def ssb_rotate_table(df, ind='year', val='value'):
"""Rotate the dataframe so that years are used as the index
Args:
df (pandas.dataframe): dataframe (from <get_from_ssb> function
ind (str): string of column name denoting time
ind (str): string of column name denoting values
Returns:
dataframe: pivotted dataframe
"""
return df.pivot_table(index=ind,
values=val,
columns=[iter for iter in df.columns \
if iter != ind and iter != val])