-
-
Notifications
You must be signed in to change notification settings - Fork 117
/
interpolate.c
2584 lines (2239 loc) · 94 KB
/
interpolate.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Created by Steinar Midtskogen, Public Domain
// http://voksenlia.net/sqlite3/interpolate.c
#include <float.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
/* The "interpolate" virtual table for SQLite3
Written by Steinar Midtskogen <steinar@latinitas.org>
Revision 1: 2012-03-06. Not yet extensively tested or verified.
Revision 2: 2012-04-13. Bug fixes. Optimisations. Filter added.
Support for multiple averages.
Revision 3: 2012-10-11. __min/__max bugfix.
Revision 4: 2012-10-21. Support timestamps in descending order.
Disable max() optimisation assuming too
much. Added first() and last() aggregate
funtions. Fixed missing ORDER BY.
Revision 5: 2012-11-03. Fixed constraint bug.
Revision 6: 2012-11-25. Added rnd() function.
This is public domain as SQLite itself. Copy, change and use as
you wish at your own risk, but if you find bugs, the author would
appreciate to hear about it or even better get a fix.
The module creates a read-only virtual table based on a natural
left join between several tables with a common timestamp column and
a union of all timestamps using the timestamp. That is, several
tables will be combined into one, like natural full outer join if
SQLite had supported it. The table has the following extra
features:
* NULL values are filled in with interpolated values where possible.
* Non-existing rows can be looked up (timestamps not in any table),
and column values will be interpolated if possible.
* Support for calculating the moving average, and max and min
values withing the moving window.
* Support for interpolation and average of angles/bearings.
* Basic filtering capabilities.
* Only INTEGER and REAL columns are supported. Everything else
becomes NULL.
This module was developed to provide a convenient way to access and
process sensor data (like weather data), but it might be useful for
other purposes as well.
To compile: gcc -std=c99 -O3 -g -fPIC -shared interpolate.c -o libsqliteinterpolate.so -lm
To load into SQLite: SELECT load_extension("libsqliteinterpolate.so");
Examples:
Use this module to turn a table like:
timestamp|value
100 |20
150 |
300 |40
into:
timestamp|value
100 |20
150 |25
300 |40
"SELECT value FROM tab WHERE timestamp = 250" will give the value
35.
While all this is possible using regular SQL, it would be somewhat
complex or very complex SQL expressions. Besides, calculating a
moving average with regular SQL would require a lot of summing of
the same values, while this module will try to do this efficiently.
Usage: CREATE VIRTUAL TABLE v USING
interpolate(tab1, [tab2, ..., tabn][, key[, col1, ..., coln]][, period1 ..., periodn][,
filter %][, subsampling /]);
Requirements:
* If a key is not given, all tables must have an INTEGER primary
key with the same name.
* If a key is given, it must be a column having the same name in
all tables. By explicitly naming the key, it is possible to use
tables without an INTEGER primary key, but this will be slower.
* col1, ..., coln are the columns to select for the virtual table.
It's not necessary to include these. If not, "*" will be assumed,
but this can make queries run slower and much more memory hungry.
* The input tables can't depend on a virtual table unless SQLite
3.7.12 or later is used. This is an SQLite design limitation.
* The key is the axis used for interpolation, usually a timestamp.
* The tables must not share other coulmn names.
* The column names in the input tables must not cause aliasing issues
(see below). If this is a problem, use a view to work around it.
* The order of the arguments are not important, except when listing
column names. Then the column to be used as the key must be the
first one listed. Also, if several of the listed columns are
present in all the input tables, the first one will be used as
a key.
Note that it will be much more efficient to create a single virtual
table from several tables than to create a virtual table from every
table and then join the virtual tables.
The optional "period" arguments state both the size for the moving
average window as well as the minimum search range for
interpolation. Note that this is the minimum search range, and
it's not strictly enforced, so the search range may be longer in
reality. If the "period" argument is not given, it will default to
86400, corresponding to 24 hours for a timestamp key in second
resolution. The period and filter arguments can appear anywhere in
the argument list. A postfix can be used which will act as a
multiplier. The possible postfixes are "s", "m", "h", "d", "w",
"M" and "y" which represent the multiplies 1, 60, 3600, 86400,
604800, 2592000 and 31536000 respectively. That is, corresponding
to a second, a minute, an hour, a day, a week, a month (30 days)
and a year (365 days) assuming that the number represents a second.
So, "3600", "1 h", "3600 s" and "60 m" all give the same period.
Note that there must be a space between the number and the postfix.
State a period only once, and not both "1 d" and "24 h".
The optional argument "filter", if non-zero, is used to create
additional columns ending in __avg_filtered, which will give the
moving average, just as __avg (see below), except that a filter
removing the highest and lowest values will be applied before the
averaging. The "filter" argument is a percentage of how many
samples that should be filtered away. A percentage of 0 means no
filtering, and the columns wont be created. If the argument is not
supplied, 0% will be assumed. Note that the percentage must end in
a "%", otherwise the number can be interpreted as a period value.
The "filter" argument might be useful on noisy data.
An integer followed by "/" can be used to apply subsampling on the
input tables. It's implemented as a "GROUP BY" statement involving
the timestamp divided by the given number. This can be useful to
speed up operations on tables with many rows at the loss of some
precision.
One use for this module could be to combine data from columns in
different tables. For instance, to calculate the dew point we need
to know the temperature and relative humidity, but if the
temperature and relative humidity are stored in different tables
and logged at different intervals, interpolation will be needed to
find the dew point at a certain time. Another example: Assume two
tables:
CREATE TABLE t1 ( temp_out REAL, time INTEGER, PRIMARY KEY (time);
CREATE TABLE t2 ( temp_in REAL, time INTEGER, PRIMARY KEY (time);
and a virtual table created by this module:
CREATE TABLE v USING interpolate(t1, t2);
We can now look up an arbitrary time in v even if that time doesn't
exist in either two source tables:
SELECT temp_in - temp_out FROM v WHERE time = 1234567890;
The module will try to interpolate if a row with time = 1234567890
doesn't exist in one or both tables. Interpolation fails if no
values to interpolate from are found within the period. In that
case, the result will be NULL.
The module defines a few new functions, and it will also create
some extra columns.
New functions:
* avg(value, timestamp): Like the regular avg(), but it will
perform a weighted average. It will sum the average of every
adjacent samples multiplied by the difference of their
timestamps. The timestamp would usually be the primary key.
This is very useful to calculate mean values when there are
irregular gaps in the data.
* avg_angle(column[, timestamp[, weight]]): As above, but the
column is treated as an angle in degrees, and there is also an
optional third argument which is an additional weight. This
ensures that the average of 350 and 10 degrees becomes 0, not
180. For instance: avg_angle(wind_dir, unix_time, wind_speed)
would compute the average wind direction which is not only
duration weighted, but also wind speed weighted. The result will
be between 0 and 360.
* first(value, timestamp): Returns the first non-NULL value. Useful
when selecting multiple columns like
SELECT first(col:1, timestamp), ... first(col_n, timestamp)
when some columns have NULL values.
* last(value, timestamp): Like above, but the last non-NULL value
is returned.
* hexagesimal(value[, decimals]): Convert a decimal number into its
hexagesimal equivalence down to second (1/3600) fractions. The
optional second argument controls how many decimal points should
be used for the seconds part.
* rnd(value[, decimals]): As the built in round(), but using a
rounding giving more intuitive results when the number can't be
represented exactly in IEEE floating point.
* havg(value1[, value2[, ..., valuen]]): "Horizontal average".
Scalar function which will return the average of all non-NULL
arguments.
* havg_angle(value1[, value2[, ..., valuen]]): As above, but the
arguments are assumed to be angles in degrees.
New columns have the same name as the original colums with a postfix:
* __avg: This column contains the centered moving average.
* __avg_filtered: This column contains the centered moving average
after a filter has been applied (see above). The column will only
exist if the filter percentage is not 0.
* __angle: Use this column to make the interpolation work on angles
in degrees.
* __angle_avg: As __avg, but the value is assumed to be an angle in
degrees.
* __min: This column contains the lowest value in the moving
average window.
* __max: This column contains the highest value in the moving
average window.
Note that this can cause name aliasing. If one of the source
tables have a column "t" it cannot also have a column "t__avg".
All new colums are HIDDEN, so they wont show up using "*" and must
be accessed (and computed) explicitly.
Important: The postfixes above assume that the default 24 hour
average was used. If the period was stated explicitly, the postfix
will end in _6h if the argument was "6 h" or _81600 if the argument
was "81600", etc. So a virtual table declared "USING
interpolate(tab, 1 w)" will give columns with postfix "__avg_1w",
"__min_1w", etc.
Examples (all assuming a 24 hour moving average window, a column
"temp" a timestamp "ts" as the primary key):
* To get the moving average:
SELECT temp__avg FROM v WHERE ts = strftime("%s", "2012-01-31 18:00:00");
which is equivalent to:
SELECT avg(temp, ts) FROM v
WHERE ts >= strftime("%s", "2012-01-31 06:00:00") AND ts <= strftime("%s", "2012-02-01
06:00:00");
* To get the highest and lowest value in the moving average window:
SELECT temp__max, temp__min FROM v WHERE ts = strftime("%s", "2012-01-31 18:00:00");
* To get the average daily low and high:
SELECT avg(temp__min, ts), avg(temp__max, ts) FROM v WHERE ...;
* To get the lowest high and highest low of any day:
SELECT min(temp__max), max(temp__min) FROM v WHERE ...;
* To get the biggest difference between daily low and high:
SELECT max(temp__max - temp__min) FROM v WHERE ...;
Note on angles:
There are several ways to do interpolation and average of angles.
It can be argued that there is no one single correct way to do
this. To find the average this module will convert all angles into
their sinus and cosinus components, so it becomes a point on the
unit circle, then add everything the regular way, except that the
time weighting is adjusted so that the weight equals the path along
the arc spanned by two angles, rather than along the shortest
paths. This ensures that the average of time weighted angles
remains the same when interpolated angles are inserted in between
(inaccuracy errors might accumulate slightly differently, though).
Still, there might be unexpected results. For instance, SELECT
havg_angle(350, 10, 60) returns 19.3243480943937, not 20 as one
perhaps would think. But, again, any "correct" average of circular
values depends on what your assumptations are.
Limitations and final notes:
Don't mix X and X__angle in the same SQL statement. This
will usually make no sense (either a value is an angle or it's
not), so it shouldn't be a problem. So a statement like SELECT
avg(dir, ts), avg_angle(dir__angle, ts) FROM ... will give
undefined results. The reason is that the two average calculations
might share data state information.
*/
static double myNAN = 0.0 / 0.0;
typedef enum {
COL_BASE = 0,
COL_AVG,
COL_MIN,
COL_MAX,
COL_ANGLE,
COL_ANGLE_AVG,
COL_AVG_FILTERED
} coltype;
/* More intuitive rounding */
static void rnd_func(sqlite3_context* context, int argc, sqlite3_value** argv) {
double x = argc == 1 ? 1 : exp(sqlite3_value_double(argv[1]) * log(10));
sqlite3_result_double(context, round(sqlite3_value_double(argv[0]) * x) / x);
}
/* Horizontal average: Add non-NULL columns */
static void havg_func(sqlite3_context* context, int argc, sqlite3_value** argv) {
double sum = 0;
int count = 0;
while (--argc >= 0)
if (sqlite3_value_type(argv[argc]) != SQLITE_NULL) {
sum += sqlite3_value_double(argv[argc]);
count++;
}
count ? sqlite3_result_double(context, sum / count) : sqlite3_result_null(context);
}
static void havg_angle_func(sqlite3_context* context, int argc, sqlite3_value** argv) {
double sumy = 0, sumx = 0;
int count = 0;
while (--argc >= 0)
if (sqlite3_value_type(argv[argc]) != SQLITE_NULL) {
double val = sqlite3_value_double(argv[argc]) * M_PI / 180;
sumx += cos(val);
sumy += sin(val);
count++;
}
count ? sqlite3_result_double(context, fmod(atan2(sumy, sumx) * 180 / M_PI + 360, 360))
: sqlite3_result_null(context);
}
static void hexagesimal_func(sqlite3_context* context, int argc, sqlite3_value** argv) {
int decimals = argc == 1 ? 0 : sqlite3_value_int(argv[1]);
int sign = sqlite3_value_double(argv[0]) < 0 ? -1 : 1;
double angle = fabs(sqlite3_value_double(argv[0]));
double deg = floor(angle);
double min = floor((angle - deg) * 60);
double sec = ((angle - deg) * 60 - min) * 60 - 0.5 / pow(10, decimals);
if (min < 0)
min = 0;
if (min >= 60)
min = 60 - DBL_MIN;
if (sec < 0)
sec = 0;
if (sec >= 60)
sec = 60 - DBL_MIN;
char* s = sqlite3_mprintf("%s%%d%c%c %%02d\' %%0%d.%df\"", sign == -1 ? "-" : "", 0xc2, 0xb0,
decimals + 2 + !!decimals, decimals);
char* buf = sqlite3_mprintf(s, (int)deg, (int)min, sec);
sqlite3_free(s);
sqlite3_result_text(context, buf, -1, SQLITE_TRANSIENT);
sqlite3_free(buf);
}
typedef struct {
double sumx;
double sumy;
double lastvalue;
double lasttimestamp;
int init;
} avg_angle_priv;
/* Calculate optionally time weighted and optionally speed weighted average of direction */
static void avg_angle_step(sqlite3_context* context, int argc, sqlite3_value** argv) {
avg_angle_priv* p = sqlite3_aggregate_context(context, sizeof(avg_angle_priv));
double val1 = sqlite3_value_double(argv[0]) * M_PI / 180;
if ((argc == 1 || p->init) && sqlite3_value_type(argv[0]) != SQLITE_NULL &&
(argc == 1 || sqlite3_value_type(argv[1]) != SQLITE_NULL)) {
if (argc == 1) {
p->sumx += cos(val1);
p->sumy += sin(val1);
} else {
double val2 = sqlite3_value_double(argv[1]) - p->lasttimestamp;
val2 *= (argc == 3 ? sqlite3_value_double(argv[2]) : 1);
double x = cos(val1) + cos(p->lastvalue);
double y = sin(val1) + sin(p->lastvalue);
double len = sqrt(x * x + y * y);
val2 /= len;
if (val1 != p->lastvalue) {
/* The weight is the distance along the arc */
double a = val1;
double b = p->lastvalue;
if (a - b > M_PI)
b += 2 * M_PI;
if (b - a > M_PI)
a += 2 * M_PI;
double xx = cos(a) - cos(b);
double yy = sin(a) - sin(b);
val2 *= sqrt(xx * xx + yy * yy) / fabs(a - b);
}
p->sumx += x * val2;
p->sumy += y * val2;
}
}
if (!p->init)
p->init = 1;
p->lastvalue = val1;
if (argc > 1)
p->lasttimestamp = sqlite3_value_double(argv[1]);
}
static void avg_angle_finalize(sqlite3_context* context) {
avg_angle_priv* p = sqlite3_aggregate_context(context, 0);
p && p->init && p->init
? sqlite3_result_double(context, fmod(atan2(p->sumy, p->sumx) * 180 / M_PI + 360, 360))
: sqlite3_result_null(context);
}
typedef struct {
double sum;
double sumx;
double sumy;
double total;
double lastvalue;
double lasttimestamp;
int init;
} avg_priv;
/* Calculate time weighted average */
static void avg_step(sqlite3_context* context, int argc, sqlite3_value** argv) {
avg_priv* p = sqlite3_aggregate_context(context, sizeof(avg_priv));
if (!p->init)
p->init = 1;
else if (sqlite3_value_type(argv[0]) != SQLITE_NULL &&
sqlite3_value_type(argv[1]) != SQLITE_NULL) {
p->sum += (sqlite3_value_double(argv[0]) + p->lastvalue) / 2 *
(sqlite3_value_double(argv[1]) - p->lasttimestamp);
p->total += sqlite3_value_double(argv[1]) - p->lasttimestamp;
}
p->lastvalue = sqlite3_value_double(argv[0]);
p->lasttimestamp = sqlite3_value_double(argv[1]);
}
static void avg_finalize(sqlite3_context* context) {
avg_priv* p = sqlite3_aggregate_context(context, 0);
p && p->init && p->total > 0 ? sqlite3_result_double(context, p->sum / p->total)
: sqlite3_result_null(context);
}
typedef struct {
double val;
double timestamp;
int init;
} firstlast_priv;
/* Find the first non-NULL value */
static void first_step(sqlite3_context* context, int argc, sqlite3_value** argv) {
firstlast_priv* p = sqlite3_aggregate_context(context, sizeof(firstlast_priv));
if (sqlite3_value_type(argv[0]) != SQLITE_NULL &&
(!p->init || sqlite3_value_double(argv[1]) < p->timestamp)) {
p->val = sqlite3_value_double(argv[0]);
p->timestamp = sqlite3_value_double(argv[1]);
p->init = 1;
}
}
/* Find the last non-NULL value */
static void last_step(sqlite3_context* context, int argc, sqlite3_value** argv) {
firstlast_priv* p = sqlite3_aggregate_context(context, sizeof(firstlast_priv));
if (sqlite3_value_type(argv[0]) != SQLITE_NULL &&
(!p->init || sqlite3_value_double(argv[1]) > p->timestamp)) {
p->val = sqlite3_value_double(argv[0]);
p->timestamp = sqlite3_value_double(argv[1]);
p->init = 1;
}
}
static void firstlast_finalize(sqlite3_context* context) {
firstlast_priv* p = sqlite3_aggregate_context(context, 0);
p && p->init ? sqlite3_result_double(context, p->val) : sqlite3_result_null(context);
}
typedef struct {
sqlite3_vtab vtab; // Must come first
sqlite3* db; // Database
const char* table; // Name of virtual table
char** tables; // List of table names to join
int* tablename_lens; // Their lengths
int tablenum; // Number of tables
sqlite_int64* table_start; // Their lowest timestamp
sqlite_int64* table_end; // Their highest timestamp
char* timestamp; // Name of the timestamp column
int timestamp_len; // Its length
int timestamp_idx; // Its column index in joined table
int cols; // Number of columns in joined table
int* coltypes; // The type of each column
char** colnames; // Names of the columns
char* selection; // Columns to select
int context; // Maximum context range to allow calculations near ends
int* contexts; // List of context ranges
char** context_names; // List of context names
int contexts_num; // Number of context ranges
int filter; // Percentage to filter away.
int subsample; // How much to subsample
} interpolate_vtab;
typedef struct {
sqlite3_vtab_cursor cur; // Must come first
sqlite3_stmt* stmt; // SQL statement that the cursor works on
int eof; // End flag
double timestamp; // Current timestamp
int timestamp_order; // Query timestamp order (1: ascending, -1: descending)
double** context_values; // Circular buffer holding rows before and after current row
int context_index; // Current position in buffer
int context_end; // Wrap position in buffer
double* timestamp_min; // Lowest timestamp for non-null value in source column
struct {
double v, t;
double min, max;
int sorted;
int j[3];
} * last; // Used for moving averages
int stmt_done; // Flag telling whether there is more rows left in SQL query
struct {
int op;
double rval;
} * constraints; // List of constraints on the timestamp
int constraints_num; // Number of contraints on the timestamp
int temptable; // Random number used to create a unique name for temporary table
int cols; // Number of cols in query, unknown until completion of first row
int row_context; // Context rows to allow calculations near ends
double min, max; // For filtering.
} interpolate_cursor;
/* Find the index of the primary key */
static int find_primary_key(sqlite3* db, const char* table, int* pk) {
int j = 0;
sqlite3_stmt* stmt;
char* buf = sqlite3_mprintf("PRAGMA table_info(%s)", table);
*pk = -1;
int rc = sqlite3_prepare_v2(db, buf, -1, &stmt, NULL);
sqlite3_free(buf);
if (rc != SQLITE_OK)
return rc;
do {
rc = sqlite3_step(stmt);
if (rc == SQLITE_DONE)
break;
if (rc != SQLITE_ROW)
return rc;
if (sqlite3_column_int64(stmt, 5) &&
!sqlite3_strnicmp((const char*)sqlite3_column_text(stmt, 2), "INTEGER", 7)) {
*pk = j;
break;
}
j++;
} while (1);
sqlite3_finalize(stmt);
return *pk != -1 ? SQLITE_OK : SQLITE_MISUSE;
}
static int concat_strings(char** bufs, int j, char** dest) {
int i, l = 0;
for (i = 0; i < j; i++)
l += strlen(bufs[i]);
*dest = sqlite3_malloc(l + 1);
**dest = 0;
if (!*dest)
return SQLITE_NOMEM;
for (i = l = 0; i < j; i++) {
strcpy(*dest + l, bufs[i]);
l += strlen(bufs[i]);
sqlite3_free(bufs[i]);
}
sqlite3_free(bufs);
return SQLITE_OK;
}
/* Parse arguments and create virtual table */
static int interpolate_connect(sqlite3* db,
void* pAux,
int argc,
const char* const* argv,
sqlite3_vtab** ppVtab,
char** pzErr) {
interpolate_vtab* v;
sqlite3_stmt *stmt, *stmt2;
int i, j, k;
int cols;
int* coltypes;
int rc;
int numbers[argc];
int contexts[argc];
const char* names[argc];
/* Argument check */
if (argc < 4)
return SQLITE_MISUSE;
*pzErr = 0;
memset(numbers, 0, sizeof(numbers));
memset(names, 0, sizeof(names));
v = sqlite3_malloc(sizeof(*v));
*ppVtab = (sqlite3_vtab*)v;
if (!v)
return SQLITE_NOMEM;
memset(v, 0, sizeof(*v));
v->context = 0;
v->filter = 0;
v->subsample = 0;
v->table = argv[2];
/* Filter or context arguments supplied? */
for (i = 3, k = 0; i < argc; i++) {
const char* p = argv[i];
for (j = 1; *p; p++)
j &= (*p <= '9' && *p >= '0') || *p <= ' ' || *p == '.' || *p == '+' || *p == '-' ||
((*p == '%' || *p == 's' || *p == 'm' || *p == 'h' || *p == 'd' || *p == '/' ||
*p == 'w' || *p == 'M' || *p == 'y') &&
!p[1]);
if (j) {
if (p[-1] == '%') {
v->filter = atol(argv[i]);
if (v->filter < 0)
v->filter = 0;
else if (v->filter > 100)
v->filter = 100;
} else if (p[-1] == '/') {
v->subsample = atol(argv[i]);
if (v->subsample < 0)
v->subsample = 0;
} else {
int factor;
switch (p[-1]) {
case 's':
factor = 1;
break;
case 'm':
factor = 60;
break;
case 'h':
factor = 3600;
break;
case 'd':
factor = 86400;
break;
case 'w':
factor = 604800;
break;
case 'M':
factor = 2592000;
break;
case 'y':
factor = 31536000;
break;
default:
factor = 1;
}
int c = atol(argv[i]) * factor;
if (c < 1)
c = 1;
if (c > v->context)
v->context = c;
for (j = 0; j < k && contexts[j] != c; j++)
;
if (j == k) {
names[k] = argv[i];
contexts[k++] = c;
}
}
numbers[i] = 1;
}
}
int dfl = 0;
if (!k) {
contexts[0] = v->context = 60 * 60 * 24;
k++;
dfl = 1;
}
v->contexts_num = k;
v->contexts = sqlite3_malloc(sizeof(int) * k);
v->context_names = sqlite3_malloc(sizeof(char*) * k);
if (!v->contexts || !v->context_names)
return SQLITE_NOMEM;
if (!dfl) {
for (i = 0; i < k; i++) {
int l = strlen(names[i]);
int c = 1;
v->contexts[i] = contexts[i];
v->context_names[i] = sqlite3_malloc(l + 2);
if (!v->context_names[i])
return SQLITE_NOMEM;
v->context_names[i][0] = '_';
for (j = 0; j < l; j++)
if (names[i][j] != ' ')
v->context_names[i][c++] = names[i][j];
v->context_names[i][c] = 0;
}
} else {
v->context_names[0] = sqlite3_malloc(1);
if (!v->context_names[0])
return SQLITE_NOMEM;
v->context_names[0][0] = 0;
v->contexts[0] = contexts[0];
}
/* Check whether arguments are tables or column names */
int found[argc];
int last;
for (last = argc - 1; numbers[last] && last >= 0; last--)
;
int al[argc];
for (i = 0; i < argc; i++) {
al[i] = strlen(argv[i]);
found[i] = 0;
}
for (i = 3, k = 0; i < argc && i <= last; i++) {
if (numbers[i])
continue;
char* buf = sqlite3_mprintf("SELECT * FROM %s", argv[i]);
rc = sqlite3_prepare_v2(db, buf, -1, &stmt, NULL);
sqlite3_free(buf);
if (rc != SQLITE_OK) {
sqlite3_finalize(stmt);
continue;
}
k++; // Identified as a table
sqlite3_step(stmt);
cols = sqlite3_column_count(stmt);
if (!cols)
return SQLITE_MISUSE;
int p;
for (p = 3; p < argc; p++) {
for (j = 0; j < cols; j++) {
const char* n = sqlite3_column_name(stmt, j);
int nl = strlen(n);
if (numbers[p] || p == i)
found[p] = -1;
else if (!sqlite3_strnicmp(argv[p], n, al[p] > nl ? al[p] : nl)) {
found[p]++;
break;
}
}
}
sqlite3_finalize(stmt);
}
/* Arguments that are not table, nor column, nor number? */
for (i = 3; i < argc; i++)
if (!found[i])
return SQLITE_MISUSE;
int first = 0;
/* Find column present in all tables */
for (i = 0; i < argc; i++) {
if (!first && found[i])
first = i;
if (found[i] == k) {
last = i;
break;
}
}
if (first) {
v->timestamp = sqlite3_malloc(al[last] + 1);
if (!v->timestamp)
return SQLITE_NOMEM;
v->timestamp_len = al[last];
strcpy(v->timestamp, argv[last]);
} else
k++;
v->tablenum = k;
if (!first || i == argc) {
sqlite3_free(v->timestamp);
v->timestamp = 0;
/* Look up primary key */
for (j = 3, i = 0; i < v->tablenum; i++, j++) {
int pk;
while (numbers[j] && j < argc)
j++;
rc = find_primary_key(db, argv[j], &pk);
if (rc != SQLITE_OK)
continue;
char* buf = sqlite3_mprintf("SELECT * FROM %s", argv[j]);
rc = sqlite3_prepare_v2(db, buf, -1, &stmt, NULL);
sqlite3_free(buf);
if (rc != SQLITE_OK)
return rc;
sqlite3_step(stmt);
cols = sqlite3_column_count(stmt);
if (!cols)
return SQLITE_MISUSE;
const char* pkname = sqlite3_column_name(stmt, pk);
if (!v->timestamp) {
v->timestamp_len = strlen(pkname);
v->timestamp = sqlite3_malloc(v->timestamp_len + 1);
if (!v->timestamp)
return SQLITE_NOMEM;
strcpy(v->timestamp, pkname);
} else {
int len = strlen(v->timestamp);
if (sqlite3_strnicmp(pkname, v->timestamp,
v->timestamp_len > len ? v->timestamp_len : len))
return SQLITE_MISUSE;
}
sqlite3_finalize(stmt);
}
}
if (!v->timestamp)
return SQLITE_MISUSE;
v->tables = sqlite3_malloc(v->tablenum * sizeof(char*));
v->tablename_lens = sqlite3_malloc(v->tablenum * sizeof(int));
v->table_start = sqlite3_malloc(v->tablenum * sizeof(double));
v->table_end = sqlite3_malloc(v->tablenum * sizeof(double));
if (!v->table_start || !v->table_end || !v->tables || !v->tablename_lens)
return SQLITE_NOMEM;
v->db = db;
for (j = 3, i = 0; i < v->tablenum; i++, j++) {
while (numbers[j] && j < argc)
j++;
v->tables[i] = sqlite3_malloc(strlen(argv[j]) + 1);
if (!v->tables[i])
return SQLITE_NOMEM;
strcpy(v->tables[i], argv[j]);
v->tablename_lens[i] = strlen(v->tables[i]);
}
for (i = j = 0; i < argc; i++)
j += found[i] > 0;
if (j < 2)
v->selection = sqlite3_mprintf("*");
else {
for (i = j = 0; i < argc; i++)
if (found[i] > 0)
j += strlen(argv[i]) + 2;
v->selection = sqlite3_malloc(j + 1);
if (!v->selection)
return SQLITE_NOMEM;
for (i = j = 0; i < argc; i++)
if (found[i] > 0) {
strcpy(v->selection + j, argv[i]);
j += al[i];
strcpy(v->selection + j, ", ");
j += 2;
}
v->selection[j - 2] = 0;
}
/* Copy description from source table. We don't need a union of the
timestamps since we're only interested in the column information */
char** bufs = sqlite3_malloc((2 + v->tablenum) * sizeof(char*));
if (!bufs)
return SQLITE_NOMEM;
j = 0;
bufs[j++] = sqlite3_mprintf("SELECT %s FROM (SELECT %s FROM %s LIMIT 1) ", v->selection,
v->timestamp, v->tables[0]);
for (i = 0; i < v->tablenum; i++)
bufs[j++] = sqlite3_mprintf("NATURAL JOIN %s ", v->tables[i]);
bufs[j++] = sqlite3_mprintf("\n");
char* select;
rc = concat_strings(bufs, j, &select);
if (rc != SQLITE_OK)
return rc;
// printf("%s: %s", v->table, select);
rc = sqlite3_prepare_v2(db, select, -1, &stmt, NULL);
if (rc != SQLITE_OK)
return rc;
sqlite3_free(select);
cols = sqlite3_column_count(stmt);
coltypes = sqlite3_malloc(sizeof(int) * cols);
if (!coltypes) {
sqlite3_finalize(stmt);
return SQLITE_NOMEM;
}
/* Find smallest and largest timestamps in each table */
for (i = 0; i < v->tablenum; i++) {
/* For some reason it's much faster to select separately than ask for both in one query */
char* buf = sqlite3_mprintf(
"SELECT (SELECT %s FROM %s ORDER BY %s LIMIT 1), (SELECT %s FROM %s ORDER BY %s DESC "
"LIMIT 1)",
v->timestamp, v->tables[i], v->timestamp, v->timestamp, v->tables[i], v->timestamp);
// printf("%s: %s\n", v->table, buf);
rc = sqlite3_prepare_v2(db, buf, -1, &stmt2, NULL);
sqlite3_free(buf);
if (rc != SQLITE_OK) {
sqlite3_finalize(stmt2);
return rc;
}
rc = sqlite3_step(stmt2);
if (rc != SQLITE_ROW) {
sqlite3_finalize(stmt2);
return rc;
}
v->table_start[i] = sqlite3_column_double(stmt2, 0);
v->table_end[i] = sqlite3_column_double(stmt2, 1);
sqlite3_finalize(stmt2);
}
int extras = v->filter ? 6 : 5;
/* Fill out column names */
v->colnames = sqlite3_malloc(cols * (1 + extras * v->contexts_num) * sizeof(char*));
for (i = 0; i < cols; i++) {
int l = strlen(sqlite3_column_name(stmt, i));
const char* n = sqlite3_column_name(stmt, i);
v->colnames[i] = sqlite3_malloc(l + 1);
if (!v->colnames[i])
return SQLITE_NOMEM;
strcpy(v->colnames[i], n);
for (j = 0; j < v->contexts_num; j++) {
l += strlen(v->context_names[j]);
v->colnames[i + cols * 1 + j * cols * extras] = sqlite3_malloc(l + 6);
v->colnames[i + cols * 2 + j * cols * extras] = sqlite3_malloc(l + 6);
v->colnames[i + cols * 3 + j * cols * extras] = sqlite3_malloc(l + 6);
v->colnames[i + cols * 4 + j * cols * extras] = sqlite3_malloc(l + 8);
v->colnames[i + cols * 5 + j * cols * extras] = sqlite3_malloc(l + 12);
if (v->filter)
v->colnames[i + cols * 6 + j * cols * extras] = sqlite3_malloc(l + 15);
if (!v->colnames[i + cols * 1 + j * cols * extras] ||
!v->colnames[i + cols * 2 + j * cols * extras] ||
!v->colnames[i + cols * 3 + j * cols * extras] ||
!v->colnames[i + cols * 4 + j * cols * extras] ||
!v->colnames[i + cols * 5 + j * cols * extras] ||
(v->filter && !v->colnames[i + cols * 6 + j * cols * extras]))
return SQLITE_NOMEM;
sprintf(v->colnames[i + cols * 1 + j * cols * extras], "%s__avg%s", n,
v->context_names[j]);
sprintf(v->colnames[i + cols * 2 + j * cols * extras], "%s__min%s", n,
v->context_names[j]);
sprintf(v->colnames[i + cols * 3 + j * cols * extras], "%s__max%s", n,
v->context_names[j]);