From 297dbac429f11aee851e9acd8e77efbd39770264 Mon Sep 17 00:00:00 2001 From: "Roberts, Simon" Date: Tue, 21 May 2024 09:05:56 +1000 Subject: [PATCH 1/6] Add geojson generators --- code/adhoc_tools.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index 1961c6a2ea..75578f345c 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -445,6 +445,22 @@ def dump_status_tech_upgrade(): pprint.pprint(tallies) +def get_all_geojson_files(show_progress: bool = True): + """A generator that returns (filename, geojson_data) for each GeoJSON file in the results directory""" + filenames = glob.glob("results/**/*.geojson") + for n, filename in enumerate(filenames): + if show_progress and n % 100 == 0: + utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) + yield filename, utils.read_json_file(filename) + + +def get_all_features(show_progress: bool = True): + """A generator that returns (filename, geojson_data, feature) for every Feature in every GeoJSON file.""" + for filename, geojson_data in get_all_geojson_files(show_progress): + for feature in geojson_data["features"]: + yield filename, geojson_data, feature + + if __name__ == "__main__": LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() logging.basicConfig(level=LOGLEVEL, format="%(asctime)s %(levelname)s %(threadName)s %(message)s") From 2188aeb7e18171dedad929e9f6d99b7356da0982 Mon Sep 17 00:00:00 2001 From: "Roberts, Simon" Date: Tue, 21 May 2024 09:12:34 +1000 Subject: [PATCH 2/6] Move print_breakdowns() back to update_breakdown.py --- code/adhoc_tools.py | 8 -------- code/update_breakdown.py | 13 ++++++++++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index 75578f345c..66597e9338 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -415,14 +415,6 @@ def update_breakdown(): return breakdowns -def print_breakdowns(breakdowns): - """Dump the breakdowns to the console as tables.""" - for key in {"tech", "upgrade"}: - rows = [{"date": run_date} | breakdowns[run_date][key] for run_date in sorted(breakdowns)] - print() - print(tabulate(rows, headers="keys", tablefmt="github")) - - def dump_status_tech_upgrade(): """Dump the tech and upgrade breakdowns to the console.""" tallies = {} # status -> tech -> upgrade:count diff --git a/code/update_breakdown.py b/code/update_breakdown.py index 8dbd2e00f6..c93f0215ef 100755 --- a/code/update_breakdown.py +++ b/code/update_breakdown.py @@ -3,7 +3,18 @@ import logging -from adhoc_tools import generate_state_breakdown, print_breakdowns, update_breakdown +from tabulate import tabulate + +from adhoc_tools import generate_state_breakdown, update_breakdown + + +def print_breakdowns(breakdowns): + """Dump the breakdowns to the console as tables.""" + for key in {"tech", "upgrade"}: + rows = [{"date": run_date} | breakdowns[run_date][key] for run_date in sorted(breakdowns)] + print() + print(tabulate(rows, headers="keys", tablefmt="github")) + if __name__ == "__main__": # pragma: no cover logging.basicConfig(level=logging.INFO) From df97c8694093eca8ed12173f048aa7e6c61f77b9 Mon Sep 17 00:00:00 2001 From: "Roberts, Simon" Date: Tue, 21 May 2024 09:50:31 +1000 Subject: [PATCH 3/6] Add support for updating the GeoJSON file. Convert most to use this. --- code/adhoc_tools.py | 121 +++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 74 deletions(-) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index 66597e9338..a58456ca59 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +import copy import csv import glob import logging @@ -240,17 +241,12 @@ def generate_all_suburbs_nbn_tallies(): """Create a file containing a tally of all suburbs by property (tech, upgrade, etc)""" exclude_properties = {"name", "locID", "gnaf_pid"} tallies = {} # property-name -> Counter() - filenames = glob.glob("results/**/*.geojson") - for n, file in enumerate(filenames): - if n % 100 == 0: - utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - - for feature in utils.read_json_file(file)["features"]: - for prop, value in feature["properties"].items(): - if prop not in exclude_properties: - if prop not in tallies: - tallies[prop] = Counter() - tallies[prop][value] += 1 + for _, _, feature in get_all_features(): + for prop, value in feature["properties"].items(): + if prop not in exclude_properties: + if prop not in tallies: + tallies[prop] = Counter() + tallies[prop][value] += 1 def _parse_quarter(item: tuple[str, int]): """Parse a quarter string into a datetime object. If NA, return epoch.""" @@ -309,20 +305,9 @@ def generate_state_breakdown(): def fix_fw_tech_type(): """Fix any tech-type 'fw' should be 'wireless'.""" - filenames = glob.glob("results/**/*.geojson") - for n, file in enumerate(filenames): - if n % 100 == 0: - utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - - found = 0 - geojson = utils.read_json_file(file) - for feature in geojson["features"]: - if feature["properties"]["tech"] == "FW": - feature["properties"]["tech"] = "WIRELESS" - found += 1 - if found: - utils.write_json_file(file, geojson, indent=1) - logging.info("Fixed %d in %s", found, file) + for _, _, feature in get_all_features(rewrite_geojson=True): + if feature["properties"]["tech"] == "FW": + feature["properties"]["tech"] = "WIRELESS" def fix_fw_tech_type_breakdowns(): @@ -355,40 +340,23 @@ def fix_tech_breakdown(tech): def check_tech_change_status_upgrade(): """Emit tally on the upgrade field for all locations with tech_change_status.""" tallies = {} - filenames = glob.glob("results/**/*.geojson") - for n, file in enumerate(filenames): - if n % 100 == 0: - utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - geojson = utils.read_json_file(file) - for feature in geojson["features"]: - tech_change = feature["properties"].get("tech_change_status") - if tech_change: - if tech_change not in tallies: - tallies[tech_change] = Counter() - tallies[tech_change][feature["properties"].get("upgrade")] += 1 - - print() + for _, _, feature in get_all_features(): + tech_change = feature["properties"].get("tech_change_status") + if tech_change: + if tech_change not in tallies: + tallies[tech_change] = Counter() + tallies[tech_change][feature["properties"].get("upgrade")] += 1 + pprint.pprint(tallies) def fix_ct_upgrades(): """Update all locations with upgrade=XXX_CT and tech=OTHER to be tech=XXX and upgrade=OTHER""" - filenames = glob.glob("results/**/*.geojson") - for n, file in enumerate(filenames): - if n % 100 == 0: - utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - - found = 0 - geojson = utils.read_json_file(file) - for feature in geojson["features"]: - upgrade_val = feature["properties"]["upgrade"] - if upgrade_val in main.CT_UPGRADE_MAP: - feature["properties"]["upgrade"] = feature["properties"]["tech"] - feature["properties"]["tech"] = main.CT_UPGRADE_MAP[upgrade_val] - found += 1 - if found: - utils.write_json_file(file, geojson, indent=1) - logging.info("Fixed %d in %s", found, file) + for _, _, feature in get_all_features(rewrite_geojson=True): + upgrade_val = feature["properties"]["upgrade"] + if upgrade_val in main.CT_UPGRADE_MAP: + feature["properties"]["upgrade"] = feature["properties"]["tech"] + feature["properties"]["tech"] = main.CT_UPGRADE_MAP[upgrade_val] # update breakdown.json and breakdown-suburbs.json update_breakdown() @@ -418,37 +386,42 @@ def update_breakdown(): def dump_status_tech_upgrade(): """Dump the tech and upgrade breakdowns to the console.""" tallies = {} # status -> tech -> upgrade:count - filenames = glob.glob("results/**/*.geojson") - for n, file in enumerate(filenames): - if n % 100 == 0: - utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - - geojson = utils.read_json_file(file) - for feature in geojson["features"]: - status = feature["properties"].get("tech_change_status", "?") - tech = feature["properties"]["tech"] - upgrade = feature["properties"]["upgrade"] - if status not in tallies: - tallies[status] = {} - if tech not in tallies[status]: - tallies[status][tech] = {} - tallies[status][tech][upgrade] = tallies[status][tech].get(upgrade, 0) + 1 + for _, _, feature in get_all_features(): + status = feature["properties"].get("tech_change_status", "?") + tech = feature["properties"]["tech"] + upgrade = feature["properties"]["upgrade"] + if status not in tallies: + tallies[status] = {} + if tech not in tallies[status]: + tallies[status][tech] = {} + tallies[status][tech][upgrade] = tallies[status][tech].get(upgrade, 0) + 1 pprint.pprint(tallies) -def get_all_geojson_files(show_progress: bool = True): +def get_all_geojson_files(show_progress: bool = True, rewrite_geojson: bool = False): """A generator that returns (filename, geojson_data) for each GeoJSON file in the results directory""" - filenames = glob.glob("results/**/*.geojson") + filenames = glob.glob("results/V**/s*.geojson") # FIXME for n, filename in enumerate(filenames): if show_progress and n % 100 == 0: utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - yield filename, utils.read_json_file(filename) + geojson_data = utils.read_json_file(filename) + if rewrite_geojson: + # take a copy of the GeoJSON, and if it is modified, write it back to the original file + geojson_data_copy = copy.deepcopy(geojson_data) + yield filename, geojson_data + if geojson_data != geojson_data_copy: + utils.write_json_file(filename, geojson_data, indent=1) + else: + yield filename, geojson_data + + # final 100% output + utils.print_progress_bar(1, 1, prefix="Progress:", suffix="Complete", length=50) -def get_all_features(show_progress: bool = True): +def get_all_features(show_progress: bool = True, rewrite_geojson: bool = False): """A generator that returns (filename, geojson_data, feature) for every Feature in every GeoJSON file.""" - for filename, geojson_data in get_all_geojson_files(show_progress): + for filename, geojson_data in get_all_geojson_files(show_progress, rewrite_geojson): for feature in geojson_data["features"]: yield filename, geojson_data, feature From cbe2f198e9f5156202dac52fe2abd5ed91bf8a21 Mon Sep 17 00:00:00 2001 From: "Roberts, Simon" Date: Tue, 21 May 2024 09:53:13 +1000 Subject: [PATCH 4/6] Move generators to utils.py --- code/adhoc_tools.py | 30 ++---------------------------- code/utils.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index a58456ca59..02cd2ef1a9 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import copy import csv import glob import logging @@ -21,6 +20,8 @@ from bs4 import BeautifulSoup from tabulate import tabulate +from utils import get_all_features + NBN_UPGRADE_DATES_URL = ( "https://www.nbnco.com.au/corporate-information/media-centre/media-statements/nbnco-announces-suburbs-and" "-towns-where-an-additional-ninty-thousand-homes-and-businesses-will-become-eligible-for-fibre-upgrades" @@ -399,33 +400,6 @@ def dump_status_tech_upgrade(): pprint.pprint(tallies) -def get_all_geojson_files(show_progress: bool = True, rewrite_geojson: bool = False): - """A generator that returns (filename, geojson_data) for each GeoJSON file in the results directory""" - filenames = glob.glob("results/V**/s*.geojson") # FIXME - for n, filename in enumerate(filenames): - if show_progress and n % 100 == 0: - utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) - geojson_data = utils.read_json_file(filename) - if rewrite_geojson: - # take a copy of the GeoJSON, and if it is modified, write it back to the original file - geojson_data_copy = copy.deepcopy(geojson_data) - yield filename, geojson_data - if geojson_data != geojson_data_copy: - utils.write_json_file(filename, geojson_data, indent=1) - else: - yield filename, geojson_data - - # final 100% output - utils.print_progress_bar(1, 1, prefix="Progress:", suffix="Complete", length=50) - - -def get_all_features(show_progress: bool = True, rewrite_geojson: bool = False): - """A generator that returns (filename, geojson_data, feature) for every Feature in every GeoJSON file.""" - for filename, geojson_data in get_all_geojson_files(show_progress, rewrite_geojson): - for feature in geojson_data["features"]: - yield filename, geojson_data, feature - - if __name__ == "__main__": LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() logging.basicConfig(level=LOGLEVEL, format="%(asctime)s %(levelname)s %(threadName)s %(message)s") diff --git a/code/utils.py b/code/utils.py index 2e78225cac..e57d95440c 100644 --- a/code/utils.py +++ b/code/utils.py @@ -1,3 +1,5 @@ +import copy +import glob import json import os @@ -37,3 +39,30 @@ def read_json_file(filename: str, empty_if_missing=False) -> dict: return {} with open(filename, encoding="utf-8") as file: return json.load(file) + + +def get_all_geojson_files(show_progress: bool = True, rewrite_geojson: bool = False): + """A generator that returns (filename, geojson_data) for each GeoJSON file in the results directory""" + filenames = glob.glob("results/V**/s*.geojson") # FIXME + for n, filename in enumerate(filenames): + if show_progress and n % 100 == 0: + print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) + geojson_data = read_json_file(filename) + if rewrite_geojson: + # take a copy of the GeoJSON, and if it is modified, write it back to the original file + geojson_data_copy = copy.deepcopy(geojson_data) + yield filename, geojson_data + if geojson_data != geojson_data_copy: + write_json_file(filename, geojson_data, indent=1) + else: + yield filename, geojson_data + + # final 100% output + print_progress_bar(1, 1, prefix="Progress:", suffix="Complete", length=50) + + +def get_all_features(show_progress: bool = True, rewrite_geojson: bool = False): + """A generator that returns (filename, geojson_data, feature) for every Feature in every GeoJSON file.""" + for filename, geojson_data in get_all_geojson_files(show_progress, rewrite_geojson): + for feature in geojson_data["features"]: + yield filename, geojson_data, feature From d5306b02b5f667007e0e4892f9a28724215722cf Mon Sep 17 00:00:00 2001 From: lyricnz Date: Mon, 20 May 2024 23:55:56 +0000 Subject: [PATCH 5/6] [MegaLinter] Apply linters fixes --- code/adhoc_tools.py | 1 - code/update_breakdown.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index 02cd2ef1a9..505e827410 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -19,7 +19,6 @@ import utils from bs4 import BeautifulSoup from tabulate import tabulate - from utils import get_all_features NBN_UPGRADE_DATES_URL = ( diff --git a/code/update_breakdown.py b/code/update_breakdown.py index c93f0215ef..6283be94d3 100755 --- a/code/update_breakdown.py +++ b/code/update_breakdown.py @@ -3,9 +3,8 @@ import logging -from tabulate import tabulate - from adhoc_tools import generate_state_breakdown, update_breakdown +from tabulate import tabulate def print_breakdowns(breakdowns): From 86e21385e0becf88ae99850e2b2db5a5221a4b1b Mon Sep 17 00:00:00 2001 From: "Roberts, Simon" Date: Tue, 21 May 2024 10:07:48 +1000 Subject: [PATCH 6/6] Whoops. Fix debug TODO --- code/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/utils.py b/code/utils.py index e57d95440c..80c7f83891 100644 --- a/code/utils.py +++ b/code/utils.py @@ -43,7 +43,7 @@ def read_json_file(filename: str, empty_if_missing=False) -> dict: def get_all_geojson_files(show_progress: bool = True, rewrite_geojson: bool = False): """A generator that returns (filename, geojson_data) for each GeoJSON file in the results directory""" - filenames = glob.glob("results/V**/s*.geojson") # FIXME + filenames = glob.glob("results/**/*.geojson") for n, filename in enumerate(filenames): if show_progress and n % 100 == 0: print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)