Skip to content

Commit

Permalink
Improve examples & related tests (#7773)
Browse files Browse the repository at this point in the history
* [WiP] improve load_examples

related to #7472, longer term we will generate the examples by exporting
them into tarball as in #7472. In the meantime, we need this subset of
the features:

* allowing specifying an alternate database connection for examples
* allowing a --only-metadata flag to `load_examples` to load only
  dashboard and chart definitions, no actual data is loaded

* Improve logging

* Rename data->examples

* Load only if not exist

* By default do not load, add a force flag

* fix build

* set published to true
  • Loading branch information
mistercrunch authored Jul 17, 2019
1 parent 86fdceb commit d65b039
Show file tree
Hide file tree
Showing 45 changed files with 583 additions and 491 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ include NOTICE
include LICENSE.txt
graft licenses/
include README.md
recursive-include superset/data *
recursive-include superset/examples *
recursive-include superset/migrations *
recursive-include superset/static *
recursive-exclude superset/static/assets/docs *
Expand Down
57 changes: 34 additions & 23 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pathlib2 import Path
import yaml

from superset import app, appbuilder, data, db, security_manager
from superset import app, appbuilder, db, examples, security_manager
from superset.utils import core as utils, dashboard_import_export, dict_import_export

config = app.config
Expand All @@ -46,6 +46,7 @@ def make_shell_context():
def init():
"""Inits the Superset application"""
utils.get_or_create_main_db()
utils.get_example_database()
appbuilder.add_permissions(update_perms=True)
security_manager.sync_role_definitions()

Expand All @@ -67,66 +68,76 @@ def version(verbose):
print(Style.RESET_ALL)


def load_examples_run(load_test_data):
print("Loading examples into {}".format(db))
def load_examples_run(load_test_data, only_metadata=False, force=False):
if only_metadata:
print("Loading examples metadata")
else:
examples_db = utils.get_example_database()
print(f"Loading examples metadata and related data into {examples_db}")

data.load_css_templates()
examples.load_css_templates()

print("Loading energy related dataset")
data.load_energy()
examples.load_energy(only_metadata, force)

print("Loading [World Bank's Health Nutrition and Population Stats]")
data.load_world_bank_health_n_pop()
examples.load_world_bank_health_n_pop(only_metadata, force)

print("Loading [Birth names]")
data.load_birth_names()
examples.load_birth_names(only_metadata, force)

print("Loading [Unicode test data]")
data.load_unicode_test_data()
examples.load_unicode_test_data(only_metadata, force)

if not load_test_data:
print("Loading [Random time series data]")
data.load_random_time_series_data()
examples.load_random_time_series_data(only_metadata, force)

print("Loading [Random long/lat data]")
data.load_long_lat_data()
examples.load_long_lat_data(only_metadata, force)

print("Loading [Country Map data]")
data.load_country_map_data()
examples.load_country_map_data(only_metadata, force)

print("Loading [Multiformat time series]")
data.load_multiformat_time_series()
examples.load_multiformat_time_series(only_metadata, force)

print("Loading [Paris GeoJson]")
data.load_paris_iris_geojson()
examples.load_paris_iris_geojson(only_metadata, force)

print("Loading [San Francisco population polygons]")
data.load_sf_population_polygons()
examples.load_sf_population_polygons(only_metadata, force)

print("Loading [Flights data]")
data.load_flights()
examples.load_flights(only_metadata, force)

print("Loading [BART lines]")
data.load_bart_lines()
examples.load_bart_lines(only_metadata, force)

print("Loading [Multi Line]")
data.load_multi_line()
examples.load_multi_line(only_metadata)

print("Loading [Misc Charts] dashboard")
data.load_misc_dashboard()
examples.load_misc_dashboard()

print("Loading DECK.gl demo")
data.load_deck_dash()
examples.load_deck_dash()

print("Loading [Tabbed dashboard]")
data.load_tabbed_dashboard()
examples.load_tabbed_dashboard(only_metadata)


@app.cli.command()
@click.option("--load-test-data", "-t", is_flag=True, help="Load additional test data")
def load_examples(load_test_data):
@click.option(
"--only-metadata", "-m", is_flag=True, help="Only load metadata, skip actual data"
)
@click.option(
"--force", "-f", is_flag=True, help="Force load data even if table already exists"
)
def load_examples(load_test_data, only_metadata=False, force=False):
"""Loads a set of Slices and Dashboards and a supporting dataset """
load_examples_run(load_test_data)
load_examples_run(load_test_data, only_metadata, force)


@app.cli.command()
Expand Down Expand Up @@ -405,7 +416,7 @@ def load_test_users_run():
for perm in security_manager.find_role("Gamma").permissions:
security_manager.add_permission_role(gamma_sqllab_role, perm)
utils.get_or_create_main_db()
db_perm = utils.get_main_database(security_manager.get_session).perm
db_perm = utils.get_main_database().perm
security_manager.add_permission_view_menu("database_access", db_perm)
db_pvm = security_manager.find_permission_view_menu(
view_menu_name=db_perm, permission_name="database_access"
Expand Down
4 changes: 4 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,10 @@ class CeleryConfig(object):
"force_https_permanent": False,
}

# URI to database storing the example data, points to
# SQLALCHEMY_DATABASE_URI by default if set to `None`
SQLALCHEMY_EXAMPLES_URI = None

try:
if CONFIG_PATH_ENV_VAR in os.environ:
# Explicitly import config module that is not in pythonpath; useful
Expand Down
15 changes: 3 additions & 12 deletions superset/connectors/connector_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,9 @@ def get_datasource_by_name(
cls, session, datasource_type, datasource_name, schema, database_name
):
datasource_class = ConnectorRegistry.sources[datasource_type]
datasources = session.query(datasource_class).all()

# Filter datasoures that don't have database.
db_ds = [
d
for d in datasources
if d.database
and d.database.name == database_name
and d.name == datasource_name
and schema == schema
]
return db_ds[0]
return datasource_class.get_datasource_by_name(
session, datasource_name, schema, database_name
)

@classmethod
def query_datasources_by_permissions(cls, session, database, permissions):
Expand Down
10 changes: 10 additions & 0 deletions superset/connectors/druid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,16 @@ def time_offset(granularity):
return 6 * 24 * 3600 * 1000 # 6 days
return 0

@classmethod
def get_datasource_by_name(cls, session, datasource_name, schema, database_name):
query = (
session.query(cls)
.join(DruidCluster)
.filter(cls.datasource_name == datasource_name)
.filter(DruidCluster.cluster_name == database_name)
)
return query.first()

# uses https://en.wikipedia.org/wiki/ISO_8601
# http://druid.io/docs/0.8.0/querying/granularities.html
# TODO: pass origin from the UI
Expand Down
15 changes: 15 additions & 0 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,21 @@ def datasource_name(self):
def database_name(self):
return self.database.name

@classmethod
def get_datasource_by_name(cls, session, datasource_name, schema, database_name):
schema = schema or None
query = (
session.query(cls)
.join(Database)
.filter(cls.table_name == datasource_name)
.filter(Database.database_name == database_name)
)
# Handling schema being '' or None, which is easier to handle
# in python than in the SQLA query in a multi-dialect way
for tbl in query.all():
if schema == (tbl.schema or None):
return tbl

@property
def link(self):
name = escape(self.name)
Expand Down
File renamed without changes.
49 changes: 27 additions & 22 deletions superset/data/bart_lines.py → superset/examples/bart_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,37 +21,42 @@
from sqlalchemy import String, Text

from superset import db
from superset.utils.core import get_or_create_main_db
from .helpers import TBL, get_example_data
from superset.utils.core import get_example_database
from .helpers import get_example_data, TBL


def load_bart_lines():
def load_bart_lines(only_metadata=False, force=False):
tbl_name = "bart_lines"
content = get_example_data("bart-lines.json.gz")
df = pd.read_json(content, encoding="latin-1")
df["path_json"] = df.path.map(json.dumps)
df["polyline"] = df.path.map(polyline.encode)
del df["path"]
database = get_example_database()
table_exists = database.has_table_by_name(tbl_name)

if not only_metadata and (not table_exists or force):
content = get_example_data("bart-lines.json.gz")
df = pd.read_json(content, encoding="latin-1")
df["path_json"] = df.path.map(json.dumps)
df["polyline"] = df.path.map(polyline.encode)
del df["path"]

df.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"color": String(255),
"name": String(255),
"polyline": Text,
"path_json": Text,
},
index=False,
)

df.to_sql(
tbl_name,
db.engine,
if_exists="replace",
chunksize=500,
dtype={
"color": String(255),
"name": String(255),
"polyline": Text,
"path_json": Text,
},
index=False,
)
print("Creating table {} reference".format(tbl_name))
tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not tbl:
tbl = TBL(table_name=tbl_name)
tbl.description = "BART lines"
tbl.database = get_or_create_main_db()
tbl.database = database
db.session.merge(tbl)
db.session.commit()
tbl.fetch_metadata()
60 changes: 33 additions & 27 deletions superset/data/birth_names.py → superset/examples/birth_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from superset import db, security_manager
from superset.connectors.sqla.models import SqlMetric, TableColumn
from superset.utils.core import get_or_create_main_db
from superset.utils.core import get_example_database
from .helpers import (
config,
Dash,
Expand All @@ -36,33 +36,39 @@
)


def load_birth_names():
def load_birth_names(only_metadata=False, force=False):
"""Loading birth name dataset from a zip file in the repo"""
data = get_example_data("birth_names.json.gz")
pdf = pd.read_json(data)
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf.to_sql(
"birth_names",
db.engine,
if_exists="replace",
chunksize=500,
dtype={
"ds": DateTime,
"gender": String(16),
"state": String(10),
"name": String(255),
},
index=False,
)
print("Done loading table!")
print("-" * 80)
# pylint: disable=too-many-locals
tbl_name = "birth_names"
database = get_example_database()
table_exists = database.has_table_by_name(tbl_name)

if not only_metadata and (not table_exists or force):
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"ds": DateTime,
"gender": String(16),
"state": String(10),
"name": String(255),
},
index=False,
)
print("Done loading table!")
print("-" * 80)

print("Creating table [birth_names] reference")
obj = db.session.query(TBL).filter_by(table_name="birth_names").first()
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not obj:
obj = TBL(table_name="birth_names")
print(f"Creating table [{tbl_name}] reference")
obj = TBL(table_name=tbl_name)
db.session.add(obj)
obj.main_dttm_col = "ds"
obj.database = get_or_create_main_db()
obj.database = database
obj.filter_select_enabled = True

if not any(col.column_name == "num_california" for col in obj.columns):
Expand All @@ -79,7 +85,6 @@ def load_birth_names():
col = str(column("num").compile(db.engine))
obj.metrics.append(SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

db.session.merge(obj)
db.session.commit()
obj.fetch_metadata()
tbl = obj
Expand Down Expand Up @@ -384,10 +389,12 @@ def load_birth_names():
merge_slice(slc)

print("Creating a dashboard")
dash = db.session.query(Dash).filter_by(dashboard_title="Births").first()
dash = db.session.query(Dash).filter_by(slug="births").first()

if not dash:
dash = Dash()
db.session.add(dash)
dash.published = True
js = textwrap.dedent(
# pylint: disable=line-too-long
"""\
Expand Down Expand Up @@ -649,5 +656,4 @@ def load_birth_names():
dash.dashboard_title = "Births"
dash.position_json = json.dumps(pos, indent=4)
dash.slug = "births"
db.session.merge(dash)
db.session.commit()
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit d65b039

Please sign in to comment.