Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove tree validations and introduce ParameterizedQuery #3230

Merged
merged 21 commits into from
Jan 17, 2019
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4a557c7
stop testing `collect_query_parameters`, it's an implementation detail
Dec 27, 2018
856e61c
add tests for `missing_query_params`
Dec 27, 2018
5bd751f
rename SQLQuery -> ParameterizedSqlQuery
Dec 30, 2018
f43c5f7
rename sql_query.py to parameterized_query.py
Dec 30, 2018
b608c5b
split to parameterized queries and parameterized SQL queries, where
Dec 30, 2018
ced3239
Merge branch 'tests-for-find-missing-params' into run-tree-validation…
Dec 30, 2018
8798475
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Dec 31, 2018
e49884d
move missing parameter detection to ParameterizedQuery
Dec 31, 2018
07df144
get rid of some old code
Jan 1, 2019
36b3045
fix tests
Jan 1, 2019
a72f781
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Jan 1, 2019
360f85c
set syntax to `custom`
Jan 1, 2019
5239eba
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Jan 1, 2019
a94f5d9
Merge branch 'run-tree-validations-only-on-sql-dialects' of github.co…
Jan 1, 2019
b0b7164
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Jan 3, 2019
23af64e
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Jan 6, 2019
a136cd0
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Jan 13, 2019
639c76f
revert the max-age-related refactoring
Jan 16, 2019
c18675a
Merge branch 'master' into run-tree-validations-only-on-sql-dialects
Jan 16, 2019
6abcedf
👋 tree validations 😢
Jan 16, 2019
5a12c23
BaseQueryRunner is no longer a factory for ParameterizedQuery, for now
Jan 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 1 addition & 53 deletions redash/handlers/embed.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,14 @@
from __future__ import absolute_import
import logging
import time

from flask import request

from .authentication import current_org
from flask_login import current_user, login_required
from flask_restful import abort
from redash import models, utils
from redash import models
from redash.handlers import routes
from redash.handlers.base import (get_object_or_404, org_scoped_rule,
record_event)
from redash.utils import find_missing_params
from redash.handlers.static import render_index
from redash.utils import gen_query_hash, mustache_render


#
# Run a parameterized query synchronously and return the result
# DISCLAIMER: Temporary solution to support parameters in queries. Should be
# removed once we refactor the query results API endpoints and handling
# on the client side. Please don't reuse in other API handlers.
#
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
missing_params = find_missing_params(query_text, parameter_values)
if missing_params:
raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params)))

query_text = mustache_render(query_text, parameter_values)

if max_age <= 0:
query_result = None
else:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)

query_hash = gen_query_hash(query_text)

if query_result:
logging.info("Returning cached result for query %s" % query_hash)
return query_result.data

try:
started_at = time.time()
data, error = data_source.query_runner.run_query(query_text, current_user)

if error:
return None
# update cache
if max_age > 0:
run_time = time.time() - started_at
query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source.id,
query_hash, query_text, data,
run_time, utils.utcnow())

models.db.session.commit()
return data
except Exception:
if max_age > 0:
abort(404, message="Unable to get result from the database, and no cached query result found.")
else:
abort(503, message="Unable to get result from the database.")
return None


@routes.route(org_scoped_rule('/embed/query/<query_id>/visualization/<visualization_id>'), methods=['GET'])
Expand Down
125 changes: 57 additions & 68 deletions redash/handlers/query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,90 +10,59 @@
require_permission, view_only)
from redash.tasks import QueryTask, record_event
from redash.tasks.queries import enqueue_query
from redash.utils import (collect_parameters_from_request, find_missing_params, gen_query_hash, json_dumps, utcnow)
from redash.utils.sql_query import SQLInjectionError, SQLQuery
from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow)
from redash.utils.parameterized_query import SQLInjectionError


def error_response(message):
return {'job': {'status': 4, 'error': message}}, 400


def apply_parameters(template, parameters, data_source):
query = SQLQuery(template).apply(parameters)

# for now we only log `SQLInjectionError` to detect false positives
try:
text = query.text
except SQLInjectionError:
record_event({
'action': 'sql_injection',
'object_type': 'query',
'query': template,
'parameters': parameters,
'timestamp': time.time(),
'org_id': data_source.org_id
})
except Exception as e:
logging.info(u"Failed applying parameters for query %s: %s", gen_query_hash(query.query), e.message)
finally:
text = query.query

return text


#
# Run a parameterized query synchronously and return the result
# DISCLAIMER: Temporary solution to support parameters in queries. Should be
# removed once we refactor the query results API endpoints and handling
# on the client side. Please don't reuse in other API handlers.
#
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
missing_params = find_missing_params(query_text, parameter_values)
if missing_params:
raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params)))

query_text = apply_parameters(query_text, parameter_values, data_source)
parameterized_query_class = data_source.query_runner.parameterized_query_class
parameterized_query = parameterized_query_class(query_text).apply(parameter_values)

if max_age <= 0:
arikfr marked this conversation as resolved.
Show resolved Hide resolved
query_result = None
else:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
if parameterized_query.missing_params:
raise Exception('Missing parameter value for: {}'.format(", ".join(parameterized_query.missing_params)))

query_text = parameterized_query.query
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that I'm not calling parameterized_query.text here, hence there's no attempt to detect SQL injection for this case.

query_hash = gen_query_hash(query_text)

if query_result:
if max_age:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
logging.info("Returning cached result for query %s" % query_hash)
return query_result

try:
started_at = time.time()
data, error = data_source.query_runner.run_query(query_text, current_user)

if error:
logging.info('got bak error')
logging.info(error)
else:
try:
started_at = time.time()
data, error = data_source.query_runner.run_query(query_text, current_user)

if error:
logging.info('got bak error')
logging.info(error)
return None

run_time = time.time() - started_at
query_result, _ = models.QueryResult.store_result(data_source.org_id, data_source, query_hash,
query_text, data, run_time, utcnow())

models.db.session.commit()
return query_result
except Exception:
if max_age > 0:
abort(404, message="Unable to get result from the database, and no cached query result found.")
else:
abort(503, message="Unable to get result from the database.")
return None

run_time = time.time() - started_at
query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source,
query_hash, query_text, data,
run_time, utcnow())

models.db.session.commit()
return query_result
except Exception as e:
if max_age > 0:
abort(404, message="Unable to get result from the database, and no cached query result found.")
else:
abort(503, message="Unable to get result from the database.")
return None


def run_query(data_source, parameter_values, query_text, query_id, max_age=0):
missing_params = find_missing_params(query_text, parameter_values)
if missing_params:
return error_response(u'Missing parameter value for: {}'.format(u", ".join(missing_params)))

if data_source.paused:
if data_source.pause_reason:
message = '{} is paused ({}). Please try later.'.format(data_source.name, data_source.pause_reason)
Expand All @@ -102,17 +71,37 @@ def run_query(data_source, parameter_values, query_text, query_id, max_age=0):

return error_response(message)

query_text = apply_parameters(query_text, parameter_values, data_source)
parameterized_query_class = data_source.query_runner.parameterized_query_class
parameterized_query = parameterized_query_class(query_text).apply(parameter_values)

if max_age == 0:
query_result = None
else:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
if parameterized_query.missing_params:
return error_response(u'Missing parameter value for: {}'.format(u", ".join(parameterized_query.missing_params)))

if query_result:
# for now we only log `SQLInjectionError` to detect false positives
try:
query_text = parameterized_query.text
except SQLInjectionError:
record_event({
'action': 'sql_injection',
'object_type': 'query',
'query': query_text,
'parameters': parameter_values,
'timestamp': time.time(),
'org_id': data_source.org_id
})
query_text = parameterized_query.query
except Exception as e:
logging.info(u"Failed applying parameters for query %s: %s", gen_query_hash(parameterized_query.query), e.message)
query_text = parameterized_query.query

if max_age:
arikfr marked this conversation as resolved.
Show resolved Hide resolved
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
return {'query_result': query_result.to_dict()}
else:
job = enqueue_query(query_text, data_source, current_user.id, metadata={"Username": current_user.email, "Query ID": query_id})
job = enqueue_query(query_text, data_source, current_user.id, metadata={
"Username": current_user.email,
"Query ID": query_id
})
return {'job': job.to_dict()}


Expand Down
8 changes: 8 additions & 0 deletions redash/query_runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from redash import settings
from redash.utils import json_loads
from redash.utils.parameterized_query import ParameterizedSqlQuery, ParameterizedQuery

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -115,6 +116,13 @@ def _run_query_internal(self, query):
raise Exception("Failed running query [%s]." % query)
return json_loads(results)['rows']

@property
def parameterized_query_class(self):
if self.syntax == 'sql':
return ParameterizedSqlQuery
else:
return ParameterizedQuery

@classmethod
def to_dict(cls):
return {
Expand Down
3 changes: 3 additions & 0 deletions redash/query_runner/google_spreadsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def request(self, *args, **kwargs):


class GoogleSpreadsheet(BaseQueryRunner):
def __init__(self, configuration):
super(GoogleSpreadsheet, self).__init__(configuration)
self.syntax = 'custom'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you check that the UI doesn't blow with syntax names it doesn't recognize?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this only affects query formatting. I tested it by changing BaseQueryRunner's syntax to custom and noticed no explosions other than query formatting.


@classmethod
def annotate_query(cls):
Expand Down
1 change: 1 addition & 0 deletions redash/query_runner/graphite.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def annotate_query(cls):

def __init__(self, configuration):
super(Graphite, self).__init__(configuration)
self.syntax = 'custom'

if "username" in self.configuration and self.configuration["username"]:
self.auth = (self.configuration["username"], self.configuration["password"])
Expand Down
37 changes: 1 addition & 36 deletions redash/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pystache
import pytz
import simplejson
from funcy import distinct, select_values
from funcy import select_values
from redash import settings
from sqlalchemy.orm.query import Query

Expand Down Expand Up @@ -153,41 +153,6 @@ def writerows(self, rows):
self.writerow(row)


def _collect_key_names(nodes):
keys = []
for node in nodes._parse_tree:
if isinstance(node, pystache.parser._EscapeNode):
keys.append(node.key)
elif isinstance(node, pystache.parser._SectionNode):
keys.append(node.key)
keys.extend(_collect_key_names(node.parsed))

return distinct(keys)


def collect_query_parameters(query):
nodes = pystache.parse(query)
keys = _collect_key_names(nodes)
return keys


def parameter_names(parameter_values):
names = []
for key, value in parameter_values.iteritems():
if isinstance(value, dict):
for inner_key in value.keys():
names.append(u'{}.{}'.format(key, inner_key))
else:
names.append(key)

return names


def find_missing_params(query_text, parameter_values):
query_parameters = set(collect_query_parameters(query_text))
return set(query_parameters) - set(parameter_names(parameter_values))


def collect_parameters_from_request(args):
parameters = {}

Expand Down
Loading