-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataload.py
207 lines (160 loc) · 6.4 KB
/
dataload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""Methods for loading shape data"""
import pandas
import os
import my
def load_bwid(params, drop_1_and_6b=True):
"""Load big_waveform_info_df
Loads from params['unit_db_dir']
Drops 1 and 6b
Returns: DataFrame
big_waveform_info_df
"""
## Load waveform info stuff
big_waveform_info_df = pandas.read_pickle(
os.path.join(params['unit_db_dir'], 'big_waveform_info_df'))
# Drop 1 and 6b
if drop_1_and_6b:
big_waveform_info_df = big_waveform_info_df.loc[
~big_waveform_info_df['layer'].isin(['1', '6b'])
].copy()
# Remove any unused levels
big_waveform_info_df.index = (
big_waveform_info_df.index.remove_unused_levels())
# Error check
assert not big_waveform_info_df.isnull().any().any()
return big_waveform_info_df
def load_session_metadata(params):
"""Load metadata about sessions, tasks, and mice.
Returns: tuple
session_df, task2mouse, mouse2task
"""
session_df = pandas.read_pickle(
os.path.join(params['pipeline_input_dir'], 'session_df'))
task2mouse = session_df.groupby('task')['mouse'].unique()
mouse2task = session_df[
['task', 'mouse']].drop_duplicates().set_index('mouse')['task']
return session_df, task2mouse, mouse2task
def load_big_tm(params, dataset='no_opto', mouse2task=None):
"""Load big_tm, the big trial matrix, and optionally filters.
params : parameters from json file
dataset : string or None
If string, loads corresponding dataset, and includes only those
trials in the result.
If None, returns original big_tm.
mouse2task : Series, or None
If Series (from load_session_metadat), then adds mouse and task
levels to big_tm index.
If None, does nothing.
Returns: DataFrame
big_tm
"""
# Load original big_tm with all trials
big_tm = pandas.read_pickle(
os.path.join(params['patterns_dir'], 'big_tm'))
# Slice out the trials of this dataset (no_opto) from big_tm
if dataset is not None:
included_trials = pandas.read_pickle(
os.path.join(params['logreg_dir'], 'datasets', dataset, 'labels')
).index
# Apply mask
big_tm = big_tm.loc[included_trials]
big_tm.index = big_tm.index.remove_unused_levels()
# Insert mouse and task levels
if mouse2task is not None:
big_tm = my.misc.insert_mouse_and_task_levels(
big_tm, mouse2task)
return big_tm
def load_data_from_patterns(params, filename, dataset='no_opto',
mouse2task=None):
"""Common loader function from patterns dir
filename : string
These are the valid options:
big_tm
big_C2_tip_whisk_cycles
big_cycle_features
big_touching_df
big_tip_pos
big_grasp_df
These are unsupported, because they aren't indexed the same:
big_ccs_df
kappa_parameterized
peri_contact_kappa
params : parameters from json file
dataset : string or None
If string, loads corresponding dataset, and includes only those
trials in the result.
If None, returns original big_tm.
mouse2task : Series, or None
If Series (from load_session_metadat), then adds mouse and task
levels to big_tm index.
If None, does nothing.
Returns: DataFrame
The requested data.
"""
# Load from patterns directory
full_filename = os.path.join(params['patterns_dir'], filename)
# Special case loading
if filename == 'big_tip_pos':
res = pandas.read_hdf(full_filename)
else:
res = pandas.read_pickle(full_filename)
# Slice out the trials of this dataset (no_opto)
if dataset is not None:
# Load trials
included_trials = pandas.read_pickle(
os.path.join(params['logreg_dir'], 'datasets', dataset, 'labels')
).index
# Apply mask
res = my.misc.slice_df_by_some_levels(res, included_trials)
res.index = res.index.remove_unused_levels()
# Insert mouse and task levels
if mouse2task is not None:
res = my.misc.insert_mouse_and_task_levels(res, mouse2task)
return res
def load_data_from_logreg(params, filename, dataset='no_opto', mouse2task=None):
"""Load data from logreg directory
filename : string
These are the valid options:
unobliviated_unaggregated_features
unobliviated_unaggregated_features_with_bin
obliviated_aggregated_features
obliviated_unaggregated_features_with_bin
These are unsupported:
BINS
params : parameters from json file
dataset : string or None
If string, loads corresponding dataset, and includes only those
trials in the result.
If None, returns without filtering.
If filename == 'obliviated_aggregated_features' and dataset is not None,
then the pre-sliced version is loaded from the dataset directory.
mouse2task : Series or None
If Series (from load_session_metadat), then adds mouse and task
levels to index.
If None, does nothing.
Returns: DataFrame
The requested data.
"""
# Load, depending on filename
if filename == 'oblivated_aggregated_features' and dataset is not None:
# Special case: this was already sliced and dumped in the dataset dir
full_filename = os.path.join(
params['logreg_dir'], 'datasets', dataset, 'features')
res = pandas.read_pickle(full_filename)
else:
# Load
full_filename = os.path.join(params['logreg_dir'], filename)
res = pandas.read_pickle(full_filename)
# Slice out the trials of this dataset (no_opto)
if dataset is not None:
# Load trials
included_trials = pandas.read_pickle(os.path.join(
params['logreg_dir'], 'datasets', dataset, 'labels')
).index
# Apply mask
res = my.misc.slice_df_by_some_levels(res, included_trials)
res.index = res.index.remove_unused_levels()
# Insert mouse and task levels
if mouse2task is not None:
res = my.misc.insert_mouse_and_task_levels(res, mouse2task)
return res