Skip to content

Commit

Permalink
Merge pull request #10698 from rouault/ogr_sqlite_median_mode
Browse files Browse the repository at this point in the history
SQLite SQL dialect: add MEDIAN, PERCENTILE, PERCENTILE_CONT and MODE ordered-set aggregate functions
  • Loading branch information
rouault committed Sep 16, 2024
2 parents ce6bae8 + 86562c6 commit 60eccc5
Show file tree
Hide file tree
Showing 3 changed files with 419 additions and 2 deletions.
117 changes: 117 additions & 0 deletions autotest/ogr/ogr_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4149,6 +4149,123 @@ def test_ogr_sqlite_stddev():
assert f.GetField(1) == pytest.approx(0.5**0.5, rel=1e-15)


@pytest.mark.parametrize(
"input_values,expected_res",
[
([], None),
([1], 1),
([2.5, None, 1], 1.75),
([3, 2.2, 1], 2.2),
([1, "invalid"], None),
],
)
def test_ogr_sqlite_median(input_values, expected_res):
"""Test MEDIAN"""

ds = ogr.Open(":memory:", update=1)
ds.ExecuteSQL("CREATE TABLE test(v)")
for v in input_values:
ds.ExecuteSQL(
"INSERT INTO test VALUES (%s)"
% (
"NULL"
if v is None
else ("'" + v + "'")
if isinstance(v, str)
else str(v)
)
)
if expected_res is None and input_values:
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT MEDIAN(v) FROM test"):
pass
else:
with ds.ExecuteSQL("SELECT MEDIAN(v) FROM test") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0) == pytest.approx(expected_res)
with ds.ExecuteSQL("SELECT PERCENTILE(v, 50) FROM test") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0) == pytest.approx(expected_res)
with ds.ExecuteSQL("SELECT PERCENTILE_CONT(v, 0.5) FROM test") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0) == pytest.approx(expected_res)


def test_ogr_sqlite_percentile():
"""Test PERCENTILE"""

ds = ogr.Open(":memory:", update=1)
ds.ExecuteSQL("CREATE TABLE test(v)")
ds.ExecuteSQL("INSERT INTO test VALUES (5),(6),(4),(7),(3),(8),(2),(9),(1),(10)")

with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE(v, 'invalid') FROM test"):
pass
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE(v, -0.1) FROM test"):
pass
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE(v, 100.1) FROM test"):
pass
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE(v, v) FROM test"):
pass


def test_ogr_sqlite_percentile_cont():
"""Test PERCENTILE_CONT"""

ds = ogr.Open(":memory:", update=1)
ds.ExecuteSQL("CREATE TABLE test(v)")
ds.ExecuteSQL("INSERT INTO test VALUES (5),(6),(4),(7),(3),(8),(2),(9),(1),(10)")

with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE_CONT(v, 'invalid') FROM test"):
pass
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE_CONT(v, -0.1) FROM test"):
pass
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT PERCENTILE_CONT(v, 1.1) FROM test"):
pass


@pytest.mark.parametrize(
"input_values,expected_res",
[
([], None),
([1, 2, None, 3, 2], 2),
(["foo", "bar", "baz", "bar"], "bar"),
([1, "foo", 2, "foo", "bar"], "foo"),
([1, "foo", 2, "foo", 1], "foo"),
],
)
def test_ogr_sqlite_mode(input_values, expected_res):
"""Test MODE"""

ds = ogr.Open(":memory:", update=1)
ds.ExecuteSQL("CREATE TABLE test(v)")
for v in input_values:
ds.ExecuteSQL(
"INSERT INTO test VALUES (%s)"
% (
"NULL"
if v is None
else ("'" + v + "'")
if isinstance(v, str)
else str(v)
)
)
if expected_res is None and input_values:
with pytest.raises(Exception), gdaltest.error_handler():
with ds.ExecuteSQL("SELECT MODE(v) FROM test"):
pass
else:
with ds.ExecuteSQL("SELECT MODE(v) FROM test") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0) == expected_res


def test_ogr_sqlite_run_deferred_actions_before_start_transaction():

ds = ogr.Open(":memory:", update=1)
Expand Down
14 changes: 12 additions & 2 deletions doc/source/user/sql_sqlite_dialect.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,18 @@ Statistics functions
In addition to standard COUNT(), SUM(), AVG(), MIN(), MAX(), the following
aggregate functions are available:

- STDDEV_POP: (GDAL >= 3.10) numerical population standard deviation.
- STDDEV_SAMP: (GDAL >= 3.10) numerical `sample standard deviation <https://en.wikipedia.org/wiki/Standard_deviation#Sample_standard_deviation>`__
- ``STDDEV_POP(numeric_value)``: (GDAL >= 3.10) numerical population standard deviation.
- ``STDDEV_SAMP(numeric_value)``: (GDAL >= 3.10) numerical `sample standard deviation <https://en.wikipedia.org/wiki/Standard_deviation#Sample_standard_deviation>`__

Ordered-set aggregate functions
+++++++++++++++++++++++++++++++

The following aggregate functions are available. Note that they require to allocate an amount of memory proportional to the number of selected rows (for ``MEDIAN``, ``PERCENTILE`` and ``PERCENTILE_CONT``) or to the number of values (for ``MODE``).

- ``MEDIAN(numeric_value)``: (GDAL >= 3.10) (continuous) median (equivalent to ``PERCENTILE(numeric_value, 50)``). NULL values are ignored.
- ``PERCENTILE(numeric_value, percentage)``: (GDAL >= 3.10) (continuous) percentile, with percentage between 0 and 100 (equivalent to ``PERCENTILE_CONT(numeric_value, percentage / 100)``). NULL values are ignored.
- ``PERCENTILE_CONT(numeric_value, fraction)``: (GDAL >= 3.10) (continuous) percentile, with fraction between 0 and 1. NULL values are ignored.
- ``MODE(value)``: (GDAL >= 3.10): mode, i.e. most frequent input value (strings and numeric values are supported), arbitrarily choosing the first one if there are multiple equally-frequent results. NULL values are ignored.

Spatialite SQL functions
++++++++++++++++++++++++
Expand Down
Loading

0 comments on commit 60eccc5

Please sign in to comment.