diff --git a/src/query/allowed_objects.c b/src/query/allowed_objects.c index c626c0b9..8e78f2ba 100644 --- a/src/query/allowed_objects.c +++ b/src/query/allowed_objects.c @@ -1,6 +1,7 @@ #include "postgres.h" #include "access/sysattr.h" +#include "utils/fmgroids.h" #include "utils/fmgrtab.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -57,6 +58,12 @@ static const FunctionByName g_allowed_builtins[] = { /* date_trunc */ (FunctionByName){.name = "timestamptz_trunc", .primary_arg = 1}, (FunctionByName){.name = "timestamp_trunc", .primary_arg = 1}, + /* extract & date_part*/ + (FunctionByName){.name = "extract_date", .primary_arg = 1}, + (FunctionByName){.name = "extract_timestamp", .primary_arg = 1}, + (FunctionByName){.name = "extract_timestamptz", .primary_arg = 1}, + (FunctionByName){.name = "timestamp_part", .primary_arg = 1}, + (FunctionByName){.name = "timestamptz_part", .primary_arg = 1}, /**/ }; @@ -73,7 +80,19 @@ static const char *const g_implicit_range_builtins_untrusted[] = { /* Some allowed functions don't appear in the builtins catalog, so we must allow them manually by OID. */ #define F_NUMERIC_ROUND_INT 1708 -static const FunctionByOid g_allowed_builtins_extra[] = {(FunctionByOid){.funcid = F_NUMERIC_ROUND_INT, .primary_arg = 0}}; +/* + * `date_part` for `date` is a SQL builtin and doesn't show up in `fmgr_isbuiltin`. + * PG 14 has the define, but PG 13 doesn't. + */ +#if PG_MAJORVERSION_NUM < 14 +#define F_DATE_PART_TEXT_DATE 1384 +#endif + +static const FunctionByOid g_allowed_builtins_extra[] = { + (FunctionByOid){.funcid = F_NUMERIC_ROUND_INT, .primary_arg = 0}, + (FunctionByOid){.funcid = F_DATE_PART_TEXT_DATE, .primary_arg = 1}, + /**/ +}; typedef struct AllowedCols { diff --git a/src/query/anonymization.c b/src/query/anonymization.c index 5e68c817..80295a68 100644 --- a/src/query/anonymization.c +++ b/src/query/anonymization.c @@ -574,6 +574,15 @@ typedef struct CollectMaterialContext char material[MAX_SEED_MATERIAL_SIZE]; } CollectMaterialContext; +static void normalize_function_name(char *func_name) +{ + if (strcmp(func_name, "date_part") == 0) + { + // Not reallocing the `func_name`, because the normalized string is shorter. + strcpy(func_name, "extract"); + } +} + static bool collect_seed_material(Node *node, CollectMaterialContext *context) { if (node == NULL) @@ -587,7 +596,7 @@ static bool collect_seed_material(Node *node, CollectMaterialContext *context) char *func_name = get_func_name(func_expr->funcid); if (func_name) { - /* TODO: Normalize function names. */ + normalize_function_name(func_name); append_seed_material(context->material, func_name, ','); pfree(func_name); } diff --git a/src/query/validation.c b/src/query/validation.c index 7b5baf22..14fd746f 100644 --- a/src/query/validation.c +++ b/src/query/validation.c @@ -188,6 +188,11 @@ static bool verify_aggregator(Node *node, void *context) aggoid != g_oid_cache.is_suppress_bin) FAILWITH_LOCATION(aggref->location, "Unsupported aggregate in query."); + if ((aggoid == g_oid_cache.sum_noise || + aggoid == g_oid_cache.avg_noise) && + TypeCategory(linitial_oid(aggref->aggargtypes)) == TYPCATEGORY_DATETIME) + FAILWITH_LOCATION(aggref->location, "Unsupported aggregate in query."); + if (aggoid == g_oid_cache.count_value || aggoid == g_oid_cache.count_value_noise || is_sum_oid(aggoid) || aggoid == g_oid_cache.sum_noise || is_avg_oid(aggoid) || aggoid == g_oid_cache.avg_noise) diff --git a/test/expected/datetime.out b/test/expected/datetime.out index 99bb8d0f..e6ff726a 100644 --- a/test/expected/datetime.out +++ b/test/expected/datetime.out @@ -85,3 +85,15 @@ SELECT count(*) FROM test_datetime WHERE date_trunc('year', ts) = '2012-01-01':: 11 (1 row) +SELECT count(*) FROM test_datetime WHERE extract(century from ts) = 21; + count +------- + 9 +(1 row) + +SELECT count(*) FROM test_datetime WHERE date_part('century', ts) = 21; + count +------- + 9 +(1 row) + diff --git a/test/expected/validation.out b/test/expected/validation.out index 3823eba9..24e8b2a2 100644 --- a/test/expected/validation.out +++ b/test/expected/validation.out @@ -126,14 +126,21 @@ GROUP BY 1, 2, 3, 4; -----------+-----------+-----------+----------- (0 rows) +-- `as extract` ensures that the column is aliased consistently in PG 13 and 14. SELECT date_trunc('year', last_seen), date_trunc('year', last_seen_tz), - date_trunc('year', birthday) + date_trunc('year', birthday), + extract(month from last_seen) as extract, + extract(month from last_seen_tz) as extract, + extract(month from birthday) as extract, + date_part('month', last_seen) as date_part, + date_part('month', last_seen_tz) as date_part, + date_part('month', birthday) as date_part FROM test_validation -GROUP BY 1, 2, 3; - date_trunc | date_trunc | date_trunc -------------+------------+------------ +GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9; + date_trunc | date_trunc | date_trunc | extract | extract | extract | date_part | date_part | date_part +------------+------------+------------+---------+---------+---------+-----------+-----------+----------- (0 rows) -- Allow all functions post-anonymization. @@ -403,6 +410,14 @@ SELECT diffix.count_histogram(city) FROM test_validation; ERROR: [PG_DIFFIX] count_histogram argument must be an AID column. LINE 1: SELECT diffix.count_histogram(city) FROM test_validation; ^ +SELECT diffix.sum_noise(last_seen) FROM test_validation; +ERROR: [PG_DIFFIX] Unsupported aggregate in query. +LINE 1: SELECT diffix.sum_noise(last_seen) FROM test_validation; + ^ +SELECT diffix.avg_noise(last_seen::date) FROM test_validation; +ERROR: [PG_DIFFIX] Unsupported aggregate in query. +LINE 1: SELECT diffix.avg_noise(last_seen::date) FROM test_validatio... + ^ -- Get rejected because only a subset of expressions is supported for defining buckets. SELECT COUNT(*) FROM test_validation GROUP BY LENGTH(city); ERROR: [PG_DIFFIX] Unsupported function used for generalization. @@ -444,6 +459,10 @@ SELECT date_trunc('year', lunchtime) FROM test_validation GROUP BY 1; ERROR: [PG_DIFFIX] Unsupported function used for generalization. LINE 1: SELECT date_trunc('year', lunchtime) FROM test_validation GR... ^ +SELECT extract(hour from lunchtime) FROM test_validation GROUP BY 1; +ERROR: [PG_DIFFIX] Unsupported function used for generalization. +LINE 1: SELECT extract(hour from lunchtime) FROM test_validation GRO... + ^ -- Get rejected because of averaging opportunity SELECT date_trunc('year', last_seen_tz, 'EST') FROM test_validation GROUP BY 1; ERROR: [PG_DIFFIX] Unsupported function used for generalization. diff --git a/test/sql/datetime.sql b/test/sql/datetime.sql index 01482b86..d9eee5f3 100755 --- a/test/sql/datetime.sql +++ b/test/sql/datetime.sql @@ -56,3 +56,5 @@ SELECT tz, count(*) FROM test_datetime GROUP BY 1; -- Datetime filtering SELECT count(*) FROM test_datetime WHERE date_trunc('year', ts) = '2012-01-01'::timestamp; +SELECT count(*) FROM test_datetime WHERE extract(century from ts) = 21; +SELECT count(*) FROM test_datetime WHERE date_part('century', ts) = 21; diff --git a/test/sql/validation.sql b/test/sql/validation.sql index df9f3c43..1fdf9287 100755 --- a/test/sql/validation.sql +++ b/test/sql/validation.sql @@ -93,12 +93,19 @@ SELECT FROM test_validation GROUP BY 1, 2, 3, 4; +-- `as extract` ensures that the column is aliased consistently in PG 13 and 14. SELECT date_trunc('year', last_seen), date_trunc('year', last_seen_tz), - date_trunc('year', birthday) + date_trunc('year', birthday), + extract(month from last_seen) as extract, + extract(month from last_seen_tz) as extract, + extract(month from birthday) as extract, + date_part('month', last_seen) as date_part, + date_part('month', last_seen_tz) as date_part, + date_part('month', birthday) as date_part FROM test_validation -GROUP BY 1, 2, 3; +GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9; -- Allow all functions post-anonymization. SELECT 2 * length(city) FROM test_validation GROUP BY city; @@ -207,6 +214,8 @@ SELECT count(distinct least(id, 5)) FROM test_validation; SELECT count(id + 5) FROM test_validation; SELECT count(least(id, 5)) FROM test_validation; SELECT diffix.count_histogram(city) FROM test_validation; +SELECT diffix.sum_noise(last_seen) FROM test_validation; +SELECT diffix.avg_noise(last_seen::date) FROM test_validation; -- Get rejected because only a subset of expressions is supported for defining buckets. SELECT COUNT(*) FROM test_validation GROUP BY LENGTH(city); @@ -222,6 +231,7 @@ SELECT COUNT(*) FROM test_validation GROUP BY substr('aaaa', 1, 2); -- Get rejected because of lack of interval support SELECT date_trunc('year', lunchtime) FROM test_validation GROUP BY 1; +SELECT extract(hour from lunchtime) FROM test_validation GROUP BY 1; -- Get rejected because of averaging opportunity SELECT date_trunc('year', last_seen_tz, 'EST') FROM test_validation GROUP BY 1;