Skip to content

Commit

Permalink
Support a data_provenance entry in auspice v2 configs
Browse files Browse the repository at this point in the history
Adds support for a `data_provenance` field in the auspice v2 config and
exported auspice v2 JSONs through additions of schema definitions for
`data_provenance` and inclusion of an example provenance entry in the
Zika build's auspice config.

Fixes #691
  • Loading branch information
huddlej committed Mar 19, 2021
1 parent 684b90a commit 45ba089
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 1 deletion.
23 changes: 22 additions & 1 deletion augur/data/schema-auspice-config-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,27 @@
"type": "object",
"description": "UNUSED v1 syntax for defining vaccine choices",
"$comment": "This is unused in `augur export v2` which gets vaccine info vis a node-data JSON file. It remains in the schema so that v1 config files can be used by `augur export v2`"
},
"data_provenance": {
"description": "Specify provenance of data included in this analysis",
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"description": "An individual data source",
"additionalProperties": false,
"required": ["name"],
"properties": {
"name": {
"description": "Name of the data source",
"type": "string"
},
"url": {
"description": "URL to use in link to data source",
"type": "string"
}
}
}
}
}
}
}
21 changes: 21 additions & 0 deletions augur/data/schema-export-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,27 @@
"$comment": "Frequencies could be specified here if desired",
"$comment": "If not specified, and frequencies are asked for in #/panels, then Auspice will attempt to fetch a seperate JSON",
"$comment": "cc John / Trevor"
},
"data_provenance": {
"description": "Specify provenance of data included in this analysis",
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"description": "An individual data source",
"additionalProperties": false,
"required": ["name"],
"properties": {
"name": {
"description": "Name of the data source",
"type": "string"
},
"url": {
"description": "URL to use in link to data source",
"type": "string"
}
}
}
}
}
},
Expand Down
22 changes: 22 additions & 0 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,27 @@ def set_panels(data_json, config, cmd_line_panels):
data_json['meta']["panels"] = panels


def set_data_provenance(data_json, config):
"""Set the data provenance from the given config file to the given data JSON.
Parameters
----------
data_json : dict
auspice JSON to be updated
config : dict
config JSON with an expected ``data_provenance`` key
>>> config = {"data_provenance": [{"name": "GISAID"}, {"name": "INSDC"}]}
>>> data_json = {"meta": {}}
>>> set_data_provenance(data_json, config)
>>> data_json["meta"]["data_provenance"][0]["name"]
'GISAID'
"""
if "data_provenance" in config:
data_json["meta"]["data_provenance"] = config["data_provenance"]


def counter_to_disambiguation_suffix(count):
"""Given a numeric count of author papers, return a distinct alphabetical
disambiguation suffix.
Expand Down Expand Up @@ -914,6 +935,7 @@ def run_v2(args):
set_node_attrs_on_tree(data_json, node_attrs)
set_geo_resolutions(data_json, config, args.geo_resolutions, read_lat_longs(args.lat_longs), node_attrs)
set_panels(data_json, config, args.panels)
set_data_provenance(data_json, config)

# Write outputs - the (unified) dataset JSON intended for auspice & perhaps the ref root-sequence JSON
indent = {"indent": None} if args.minify_json else {}
Expand Down
9 changes: 9 additions & 0 deletions tests/builds/zika/config/auspice_config_v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,14 @@
"map",
"entropy",
"frequencies"
],
"data_provenance": [
{
"name": "GISAID"
},
{
"name": "COG UK",
"url": "https://www.cogconsortium.uk"
}
]
}

0 comments on commit 45ba089

Please sign in to comment.