Skip to content

Commit

Permalink
Refactor singular test to use intermediate tables:
Browse files Browse the repository at this point in the history
* These intermediate tables can also be used to populate reports to share
with DOF
  • Loading branch information
sf-dcp committed Sep 27, 2024
1 parent 9551bc5 commit f902c94
Show file tree
Hide file tree
Showing 9 changed files with 194 additions and 59 deletions.
22 changes: 22 additions & 0 deletions products/pluto/models/qaqc/intermediate/_int_models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 2

models:
- name: qaqc_int__active_condo_bbl_unitsres_corrections
description: |
Contains condo bbls that are being corrected for res units in final PLUTO, using `pluto_input_research.csv` file
columns:
- name: bbl
- name: old_value

- name: qaqc_int__devdb_bbl_units_summary
description: |
This table provides total number of units per bbl, based on the most recent construction records in DevDB, excluding demolitions.
columns:
- name: bbl
- name: units_co
description: Total number of certificate of occupancy (CO) units
- name: classa_prop
description: Total number of Class A units (residential units)
- name: count_bins
description: Total number of BINs per BBL that had modification in DevDB. Note, this may not represent all building on a lot
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{ config(
materialized = 'table'
) }}

WITH historical_condo_unit_corrections AS (
SELECT
bbl,
old_value::numeric
FROM {{ source("recipe_sources", "pluto_input_research") }}
WHERE field = 'unitsres'
AND substring(bbl, 7, 2) = '75'
),

primebbl_condo_units AS (
SELECT
primebbl,
sum(coop_apts) AS coop_apts,
sum(units) AS units
FROM {{ ref('pluto_rpad_geo') }}
WHERE tl NOT LIKE '75%'
GROUP BY primebbl
)

SELECT l.*
FROM historical_condo_unit_corrections AS l
INNER JOIN primebbl_condo_units AS r
ON l.bbl = r.primebbl
WHERE l.old_value = r.coop_apts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{{ config(
materialized = 'table'
) }}

WITH completed_construction_records AS (
SELECT
bbl,
bin,
units_co,
CASE
WHEN classa_prop IS null AND job_status = '5. Completed Construction' THEN 0
ELSE classa_prop
END AS classa_prop_modified,
job_type,
job_status,
date_complete,
row_number() OVER (PARTITION BY bbl, bin ORDER BY date_complete DESC) AS order_num
FROM {{ ref("stg__dcp_developments") }}
WHERE job_status IN ('5. Completed Construction', '4. Partially Completed Construction')
),

most_recent_construction_by_bin AS (
SELECT
bbl,
bin,
units_co,
classa_prop_modified AS classa_prop,
job_type,
job_status,
date_complete
FROM completed_construction_records
WHERE order_num = 1 -- Selecting the most recent record per BIN
)

SELECT
bbl,
sum(units_co) AS units_co,
sum(classa_prop) AS classa_prop,
count(*) AS count_bins
FROM most_recent_construction_by_bin
WHERE job_type <> 'Demolition' -- Exclude demolished buildings as property taxes may still be paid, aligning with the DOF data
GROUP BY bbl
27 changes: 27 additions & 0 deletions products/pluto/models/qaqc/reports/_reports_models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
version: 2

models:
- name: qaqc_reports__dof_incorrect_condo_res_units
description: |
This report is to be shared with DOF.
It contains DOF records that need to be corrected for res unit count at the source, supplied with DevDB unit counts
columns:
- name: primebbl
- name: units
description: DOF number of total units (including non-residential) per BBL
- name: coop_apts
description: DOF number of residential units per BBL
- name: units_co
description: DevDB number of units on Certificate of Occupancy
- name: classa_prop
description: DevDB number of class A units
- name: dof_matches_devdb_units
description: Flag to indicate whether DOF res units match DevDB res units (coop_apts == classa_prop)
- name: diff
description: Difference in residential units between

- name: qaqc_reports__dof_pts_incorrect_condo_res_units
description: |
This report is to be shared with DOF.
It contains raw DOF PTS data filtered for condo records in `qaqc_reports__dof_incorrect_condo_res_units``
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
WITH active_condo_unitsres_corrections AS (
SELECT bbl
FROM {{ ref('qaqc_int__active_condo_bbl_unitsres_corrections') }}
),

dof_condo_units AS (
SELECT
primebbl,
sum(coop_apts) AS coop_apts,
sum(units) AS units
FROM {{ ref('pluto_rpad_geo') }}
WHERE
primebbl IN (SELECT bbl FROM active_condo_unitsres_corrections)
AND tl NOT LIKE '75%'
GROUP BY primebbl
)

SELECT
l.*,
r.units_co,
r.classa_prop,
r.count_bins,
l.coop_apts = r.classa_prop AS dof_matches_devdb_units,
r.classa_prop - l.coop_apts AS diff
FROM dof_condo_units AS l
INNER JOIN {{ ref('qaqc_int__devdb_bbl_units_summary') }} AS r
ON l.primebbl = r.bbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH active_condo_unitsres_corrections AS (
SELECT bbl
FROM {{ ref('qaqc_int__active_condo_bbl_unitsres_corrections') }}
),

filtered_dof_pts_propmaster AS (
SELECT boro || tb || tl AS bbl
FROM {{ ref('dof_pts_propmaster') }}
WHERE primebbl IN (
SELECT bbl FROM active_condo_unitsres_corrections
)
)

SELECT * FROM
{{ source("recipe_sources", "pluto_pts") }}
WHERE (boro || block || lot) IN (
SELECT bbl FROM filtered_dof_pts_propmaster
)
8 changes: 8 additions & 0 deletions products/pluto/seeds/_seeds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

seeds:
- name: ignored_bbls_for_unit_count_test
description: |
List of manually researched bbls that have correct count of residential units in DOF PTS data,
despite failing `assert_condo_bbl_unit_count_research_required` test. These records are ignored
during the test.
1 change: 1 addition & 0 deletions products/pluto/seeds/ignored_bbls_for_unit_count_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bbl,pluto_version
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@
The remaining logic cross-checks these BBL "offenders" with the PLUTO corrections file and the Developments database (aka DevDB) to narrow down the records needing manual review.
''',
'next_steps': '''
Manually research failing records. If confirmed, update the corrections file with the correct unit counts and share a report with Amanda to send to DOF.
1. Manually research failing records.
2. If confirmed, update the corrections file with the correct total and res unit number;
not confirmed bbls need to be added to the `ignored_bbls_for_unit_count_test` seed file.
3. Re-run PLUTO build. If this check passes, share reports from `models/qaqc/reports` with Amanda to send them to DOF.
'''
}
)
}}

-- Find condo records where multiple unit BBLs have coop_apts > 1
-- Find records where multiple unit BBLs have coop_apts > 1
WITH condo_prime_bbls AS (
SELECT DISTINCT primebbl
SELECT primebbl
FROM {{ ref('pluto_rpad_geo') }}
WHERE coop_apts > 1
GROUP BY primebbl
Expand All @@ -35,64 +38,23 @@ primebbl_offenders AS (
GROUP BY primebbl
),

historical_unit_corrections AS (
SELECT
bbl,
old_value::numeric
FROM {{ source("recipe_sources", "pluto_input_research") }}
WHERE field = 'unitsres' AND substring(bbl, 7, 2) = '75'
),

current_unit_corrections AS (
SELECT l.* FROM historical_unit_corrections AS l
INNER JOIN primebbl_offenders AS r
ON l.bbl = r.primebbl AND l.old_value = r.coop_apts
),

-- Identify bbl offenders that are not actively corrected
uncorrected_primebbl_offenders AS (
SELECT *
FROM primebbl_offenders
WHERE primebbl NOT IN (SELECT bbl FROM current_unit_corrections)
),

-- Filter DevDB for uncorrected offenders
devdb_uncorrected_offenders AS (
SELECT
*,
row_number() OVER (PARTITION BY bbl, bin ORDER BY date_complete DESC) AS order_num,
CASE
WHEN classa_prop IS null AND job_status = '5. Completed Construction' THEN 0
ELSE classa_prop
END AS classa_prop_modified
FROM {{ ref("stg__dcp_developments") }}
WHERE
bbl IN
(SELECT DISTINCT primebbl FROM uncorrected_primebbl_offenders)
AND job_status IN ('5. Completed Construction', '4. Partially Completed Construction')
WHERE primebbl NOT IN (
SELECT bbl
FROM {{ ref('qaqc_int__active_condo_bbl_unitsres_corrections') }}
)
),

devdb_uncorrected_offender_grouped_by_latest_bin AS (
SELECT
bbl,
bin,
units_co,
classa_prop_modified AS classa_prop,
job_type,
job_status,
date_complete
FROM devdb_uncorrected_offenders
WHERE order_num = 1
),

devdb_uncorrected_offender_grouped_by_bbl AS (
SELECT
bbl,
sum(units_co) AS units_co,
sum(classa_prop) AS classa_prop,
count(*) AS count_bins
FROM devdb_uncorrected_offender_grouped_by_latest_bin
WHERE job_type <> 'Demolition' -- Exclude demolished buildings as property taxes may still be paid, aligning with the DOF data
GROUP BY bbl
not_ignored_primebbls AS (
SELECT *
FROM uncorrected_primebbl_offenders
WHERE primebbl NOT IN (
SELECT bbl::decimal::bigint::text
FROM {{ ref('ignored_bbls_for_unit_count_test') }}
)
),

-- Join DevDB with uncorrected offenders
Expand All @@ -104,13 +66,13 @@ offenders_joined_with_devdb AS (
r.count_bins,
l.coop_apts = r.classa_prop AS dof_matches_devdb_units,
r.classa_prop - l.coop_apts AS diff
FROM uncorrected_primebbl_offenders AS l
LEFT JOIN devdb_uncorrected_offender_grouped_by_bbl AS r
FROM not_ignored_primebbls AS l
LEFT JOIN {{ ref('qaqc_int__devdb_bbl_units_summary') }} AS r
ON l.primebbl = r.bbl
ORDER BY dof_matches_devdb_units
)

-- Final selection of records where the unit difference exceeds threshold
SELECT *
FROM offenders_joined_with_devdb
WHERE diff >= 50 OR diff <= -50 -- 50 is an arbitrary threshold
ORDER BY dof_matches_devdb_units

0 comments on commit f902c94

Please sign in to comment.