Skip to content

Commit

Permalink
Merge pull request #368 from lyricnz/feature/geojson-processor
Browse files Browse the repository at this point in the history
Implement a reusable function for processing GeoJSON files
  • Loading branch information
LukePrior authored May 21, 2024
2 parents e58f095 + 86e2138 commit ceb97c1
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 78 deletions.
108 changes: 31 additions & 77 deletions code/adhoc_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import utils
from bs4 import BeautifulSoup
from tabulate import tabulate
from utils import get_all_features

NBN_UPGRADE_DATES_URL = (
"https://www.nbnco.com.au/corporate-information/media-centre/media-statements/nbnco-announces-suburbs-and"
Expand Down Expand Up @@ -240,17 +241,12 @@ def generate_all_suburbs_nbn_tallies():
"""Create a file containing a tally of all suburbs by property (tech, upgrade, etc)"""
exclude_properties = {"name", "locID", "gnaf_pid"}
tallies = {} # property-name -> Counter()
filenames = glob.glob("results/**/*.geojson")
for n, file in enumerate(filenames):
if n % 100 == 0:
utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)

for feature in utils.read_json_file(file)["features"]:
for prop, value in feature["properties"].items():
if prop not in exclude_properties:
if prop not in tallies:
tallies[prop] = Counter()
tallies[prop][value] += 1
for _, _, feature in get_all_features():
for prop, value in feature["properties"].items():
if prop not in exclude_properties:
if prop not in tallies:
tallies[prop] = Counter()
tallies[prop][value] += 1

def _parse_quarter(item: tuple[str, int]):
"""Parse a quarter string into a datetime object. If NA, return epoch."""
Expand Down Expand Up @@ -309,20 +305,9 @@ def generate_state_breakdown():

def fix_fw_tech_type():
"""Fix any tech-type 'fw' should be 'wireless'."""
filenames = glob.glob("results/**/*.geojson")
for n, file in enumerate(filenames):
if n % 100 == 0:
utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)

found = 0
geojson = utils.read_json_file(file)
for feature in geojson["features"]:
if feature["properties"]["tech"] == "FW":
feature["properties"]["tech"] = "WIRELESS"
found += 1
if found:
utils.write_json_file(file, geojson, indent=1)
logging.info("Fixed %d in %s", found, file)
for _, _, feature in get_all_features(rewrite_geojson=True):
if feature["properties"]["tech"] == "FW":
feature["properties"]["tech"] = "WIRELESS"


def fix_fw_tech_type_breakdowns():
Expand Down Expand Up @@ -355,40 +340,23 @@ def fix_tech_breakdown(tech):
def check_tech_change_status_upgrade():
"""Emit tally on the upgrade field for all locations with tech_change_status."""
tallies = {}
filenames = glob.glob("results/**/*.geojson")
for n, file in enumerate(filenames):
if n % 100 == 0:
utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)
geojson = utils.read_json_file(file)
for feature in geojson["features"]:
tech_change = feature["properties"].get("tech_change_status")
if tech_change:
if tech_change not in tallies:
tallies[tech_change] = Counter()
tallies[tech_change][feature["properties"].get("upgrade")] += 1

print()
for _, _, feature in get_all_features():
tech_change = feature["properties"].get("tech_change_status")
if tech_change:
if tech_change not in tallies:
tallies[tech_change] = Counter()
tallies[tech_change][feature["properties"].get("upgrade")] += 1

pprint.pprint(tallies)


def fix_ct_upgrades():
"""Update all locations with upgrade=XXX_CT and tech=OTHER to be tech=XXX and upgrade=OTHER"""
filenames = glob.glob("results/**/*.geojson")
for n, file in enumerate(filenames):
if n % 100 == 0:
utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)

found = 0
geojson = utils.read_json_file(file)
for feature in geojson["features"]:
upgrade_val = feature["properties"]["upgrade"]
if upgrade_val in main.CT_UPGRADE_MAP:
feature["properties"]["upgrade"] = feature["properties"]["tech"]
feature["properties"]["tech"] = main.CT_UPGRADE_MAP[upgrade_val]
found += 1
if found:
utils.write_json_file(file, geojson, indent=1)
logging.info("Fixed %d in %s", found, file)
for _, _, feature in get_all_features(rewrite_geojson=True):
upgrade_val = feature["properties"]["upgrade"]
if upgrade_val in main.CT_UPGRADE_MAP:
feature["properties"]["upgrade"] = feature["properties"]["tech"]
feature["properties"]["tech"] = main.CT_UPGRADE_MAP[upgrade_val]

# update breakdown.json and breakdown-suburbs.json
update_breakdown()
Expand All @@ -415,32 +383,18 @@ def update_breakdown():
return breakdowns


def print_breakdowns(breakdowns):
"""Dump the breakdowns to the console as tables."""
for key in {"tech", "upgrade"}:
rows = [{"date": run_date} | breakdowns[run_date][key] for run_date in sorted(breakdowns)]
print()
print(tabulate(rows, headers="keys", tablefmt="github"))


def dump_status_tech_upgrade():
"""Dump the tech and upgrade breakdowns to the console."""
tallies = {} # status -> tech -> upgrade:count
filenames = glob.glob("results/**/*.geojson")
for n, file in enumerate(filenames):
if n % 100 == 0:
utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)

geojson = utils.read_json_file(file)
for feature in geojson["features"]:
status = feature["properties"].get("tech_change_status", "?")
tech = feature["properties"]["tech"]
upgrade = feature["properties"]["upgrade"]
if status not in tallies:
tallies[status] = {}
if tech not in tallies[status]:
tallies[status][tech] = {}
tallies[status][tech][upgrade] = tallies[status][tech].get(upgrade, 0) + 1
for _, _, feature in get_all_features():
status = feature["properties"].get("tech_change_status", "?")
tech = feature["properties"]["tech"]
upgrade = feature["properties"]["upgrade"]
if status not in tallies:
tallies[status] = {}
if tech not in tallies[status]:
tallies[status][tech] = {}
tallies[status][tech][upgrade] = tallies[status][tech].get(upgrade, 0) + 1

pprint.pprint(tallies)

Expand Down
12 changes: 11 additions & 1 deletion code/update_breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,17 @@

import logging

from adhoc_tools import generate_state_breakdown, print_breakdowns, update_breakdown
from adhoc_tools import generate_state_breakdown, update_breakdown
from tabulate import tabulate


def print_breakdowns(breakdowns):
"""Dump the breakdowns to the console as tables."""
for key in {"tech", "upgrade"}:
rows = [{"date": run_date} | breakdowns[run_date][key] for run_date in sorted(breakdowns)]
print()
print(tabulate(rows, headers="keys", tablefmt="github"))


if __name__ == "__main__": # pragma: no cover
logging.basicConfig(level=logging.INFO)
Expand Down
29 changes: 29 additions & 0 deletions code/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import copy
import glob
import json
import os

Expand Down Expand Up @@ -37,3 +39,30 @@ def read_json_file(filename: str, empty_if_missing=False) -> dict:
return {}
with open(filename, encoding="utf-8") as file:
return json.load(file)


def get_all_geojson_files(show_progress: bool = True, rewrite_geojson: bool = False):
"""A generator that returns (filename, geojson_data) for each GeoJSON file in the results directory"""
filenames = glob.glob("results/**/*.geojson")
for n, filename in enumerate(filenames):
if show_progress and n % 100 == 0:
print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50)
geojson_data = read_json_file(filename)
if rewrite_geojson:
# take a copy of the GeoJSON, and if it is modified, write it back to the original file
geojson_data_copy = copy.deepcopy(geojson_data)
yield filename, geojson_data
if geojson_data != geojson_data_copy:
write_json_file(filename, geojson_data, indent=1)
else:
yield filename, geojson_data

# final 100% output
print_progress_bar(1, 1, prefix="Progress:", suffix="Complete", length=50)


def get_all_features(show_progress: bool = True, rewrite_geojson: bool = False):
"""A generator that returns (filename, geojson_data, feature) for every Feature in every GeoJSON file."""
for filename, geojson_data in get_all_geojson_files(show_progress, rewrite_geojson):
for feature in geojson_data["features"]:
yield filename, geojson_data, feature

0 comments on commit ceb97c1

Please sign in to comment.