-
Notifications
You must be signed in to change notification settings - Fork 14
/
prep community needs index.r
195 lines (148 loc) · 8.17 KB
/
prep community needs index.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
##
## Prepare the Community Needs Index, showing the Index and its domains for all Wards, along with which are the left-behind areas
##
## More info: https://ocsi.uk/2020/01/15/community-needs-index-your-questions-answered/
## The data isn't public but OCSI can provide it to social purpose organisations - email them for info
##
library(tidyverse)
library(janitor)
library(readxl)
library(Hmisc)
library(httr)
library(sf)
source("functions.r")
data.dir.cmi = "P:/Operations/Innovation & Insight/Insight/Data science/Data/Community Needs Index" # use our local copy of the data
##
## Load data
##
index_scores = read_excel(file.path(data.dir.cmi, "Phase 2 Community Needs Index Score and Rank.xlsx"))
index_domains = read_excel(file.path(data.dir.cmi, "Community Needs Index domain scores.xlsx"))
left_behind = read_excel(file.path(data.dir.cmi, "Phase 2 Left Behind Areas.xlsx"))
# get ward to Local Authority lookup
ward_lad = read_csv("https://opendata.arcgis.com/datasets/500d4283cbe54e3fa7f358399ba3783e_0.csv") %>%
select(WD17CD, LAD17CD)
# lookup of Local Authority codes from 2017 to 2019
lad_17_19 = read_csv("data/LAD 2017 to LAD 2019 codes.csv")
##
## Merge index scores, domains and left-behind areas into one dataframe
##
left_behind$left_behind = 1 # flag which wards are 'left behind'
community_needs = index_domains %>%
left_join(index_scores %>% select(`Ward Code`, `Community Need Score Phase 2 Score`, `Community Needs Index Phase 2 Rank`),
by = "Ward Code") %>%
left_join(left_behind %>% select(`Ward Code`, left_behind),
by = "Ward Code") %>%
mutate(left_behind = replace_na(left_behind, 0))
##
## Reverse the ranks so that 1 = best-scoring wards (so it matches the rest of our process where highest ranks are most vulnerable)
##
n_wards = nrow(community_needs) + 1
community_needs = community_needs %>%
mutate(`Civic Assets rank` = n_wards - `Civic Assets rank`,
`Engaged community rank` = n_wards - `Engaged community rank`,
`Connectedness rank` = n_wards - `Connectedness rank`,
`Community Needs Index Phase 2 Rank` = n_wards - `Community Needs Index Phase 2 Rank`)
##
## Calculate risk quantiles - not using this approach anymore
##
# community_needs = community_needs %>%
# mutate(civic_assets_q = as.integer(cut2(`Civic Assets score`, g = 5)),
# connectedness_q = as.integer(cut2(`Connectedness score`, g = 5)),
# engaged_q = as.integer(cut2(`Engaged community score`, g = 5)),
# need_index_q = as.integer(cut2(`Community Need Score Phase 2 Score`, g = 5)))
##
## Save subset of data for wards - we're not using the Connectedness domain, so drop that
##
community_needs_ward = community_needs %>%
select(Code = `Ward Code`, `Left behind area?` = left_behind, `Civic Assets rank`, `Engaged community rank`, `Connectedness rank`, `Community Needs Index rank` = `Community Needs Index Phase 2 Rank`)
write_csv(community_needs_ward, "data/community-needs-ward.csv")
##
## Aggregate wards into Local Authorities
## - according to OCSI, we can only look for higher concentrations of wards that score highly on the Community Needs Index within a larger geographical area
##
# download ward-level population estimates from https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/wardlevelmidyearpopulationestimatesexperimental
# GET("https://www.ons.gov.uk/file?uri=%2fpeoplepopulationandcommunity%2fpopulationandmigration%2fpopulationestimates%2fdatasets%2fwardlevelmidyearpopulationestimatesexperimental%2fmid2018sape21dt8a/sape21dt8amid2018ward20182019lasyoaestunformatted.zip",
# write_disk(tf <- tempfile(fileext = ".zip")))
#
# unzip(tf, exdir = "data/population")
# unlink(tf); rm(tf)
#
# pop_ward = read_excel("data/population/SAPE21DT8a-mid-2018-ward-2018-on-2019-LA-syoa-estimates-unformatted.xlsx", sheet = "Mid-2018 Persons", skip = 4)
# community_needs %>%
# select(`Ward Code`, `Community Needs Index Phase 2 Rank`) %>%
# left_join(pop_ward, by = c("Ward Code" = "Ward Code 1")) %>%
# filter(is.na(`All Ages`))
#
# ward_all_codes = read_csv("https://github.com/drkane/geo-lookups/raw/master/ward_all_codes.csv")
# aggregate overall CMI score
community_needs_lad_cmi = community_needs %>%
left_join(ward_lad, by = c("Ward Code" = "WD17CD")) %>%
left_join(lad_17_19, by = "LAD17CD") %>%
mutate(Quintile = calc_risk_quantiles(`Community Needs Index Phase 2 Rank`, quants = 5)) %>%
# label wards by whether they're in worst-score category then summarise by this label
mutate(Worst = ifelse(Quintile == 5, "Worst", "Other")) %>%
tabyl(LAD19CD, Worst) %>%
# calculate proportion of worst-score areas
mutate(`Proportion of wards with greatest community needs` = Worst / (Worst + Other)) %>%
select(-Other, -Worst) %>%
na.omit()
# aggregate community assets score
community_needs_lad_assets = community_needs %>%
left_join(ward_lad, by = c("Ward Code" = "WD17CD")) %>%
left_join(lad_17_19, by = "LAD17CD") %>%
mutate(Quintile = calc_risk_quantiles(`Civic Assets rank`, quants = 5)) %>%
# label wards by whether they're in worst-score category then summarise by this label
mutate(Worst = ifelse(Quintile == 5, "Worst", "Other")) %>%
tabyl(LAD19CD, Worst) %>%
# calculate proportion of worst-score areas
mutate(`Proportion of wards with worst Community Assets scores` = Worst / (Worst + Other)) %>%
select(-Other, -Worst) %>%
na.omit()
# aggregate engagement score
community_needs_lad_engagement = community_needs %>%
left_join(ward_lad, by = c("Ward Code" = "WD17CD")) %>%
left_join(lad_17_19, by = "LAD17CD") %>%
mutate(Quintile = calc_risk_quantiles(`Engaged community rank`, quants = 5)) %>%
# label wards by whether they're in worst-score category then summarise by this label
mutate(Worst = ifelse(Quintile == 5, "Worst", "Other")) %>%
tabyl(LAD19CD, Worst) %>%
# calculate proportion of worst-score areas
mutate(`Proportion of wards with worst Engagement scores` = Worst / (Worst + Other)) %>%
select(-Other, -Worst) %>%
na.omit()
# aggregate connectedness score
community_needs_lad_connectedness = community_needs %>%
left_join(ward_lad, by = c("Ward Code" = "WD17CD")) %>%
left_join(lad_17_19, by = "LAD17CD") %>%
mutate(Quintile = calc_risk_quantiles(`Connectedness rank`, quants = 5)) %>%
# label wards by whether they're in worst-score category then summarise by this label
mutate(Worst = ifelse(Quintile == 5, "Worst", "Other")) %>%
tabyl(LAD19CD, Worst) %>%
# calculate proportion of worst-score areas
mutate(`Proportion of wards with worst Connectedness scores` = Worst / (Worst + Other)) %>%
select(-Other, -Worst) %>%
na.omit()
# aggregate left-behind areas
community_needs_lad_left_behind_areas = community_needs %>%
left_join(ward_lad, by = c("Ward Code" = "WD17CD")) %>%
left_join(lad_17_19, by = "LAD17CD") %>%
tabyl(LAD19CD, left_behind) %>%
rename(`Non left-behind areas` = `0`, `Left-behind areas` = `1`) %>%
mutate(`Proportion of wards that are left-behind areas` = `Left-behind areas` / (`Left-behind areas` + `Non left-behind areas`)) %>%
na.omit()
# merge all aggregated ranks
community_needs_lad = community_needs_lad_assets %>%
left_join(community_needs_lad_engagement, by = "LAD19CD") %>%
left_join(community_needs_lad_connectedness, by = "LAD19CD") %>%
left_join(community_needs_lad_cmi, by = "LAD19CD") %>%
left_join(community_needs_lad_left_behind_areas, by = "LAD19CD")
write_csv(community_needs_lad, "data/community-needs-LA.csv")
# make a geojson of wards - Wards (December 2017) Generalised Clipped Boundaries in UK (WGS84)
wards = read_sf("https://opendata.arcgis.com/datasets/7193daa99995445aa84a0b23352e56a1_2.geojson")
wards %>%
left_join(community_needs_ward, by = c("wd17cd" = "Code")) %>%
filter(!is.na(`Community Needs Index rank`)) %>%
mutate(`Community Needs Index decile` = calc_risk_quantiles(`Community Needs Index rank`, quants = 10)) %>%
write_sf("data/community-needs-ward.geojson")
rm(index_scores, index_domains, left_behind, data.dir.cmi, ward_lad,
n_wards, lad_17_19, community_needs, community_needs_lad_assets, community_needs_lad_engagement, community_needs_lad_cmi)