-
Notifications
You must be signed in to change notification settings - Fork 0
/
Get_data.R
64 lines (52 loc) · 1.84 KB
/
Get_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
library(stringr)
source("Request_and_parse.R")
# Produce dataframe for specified norm between dates ------------
df_norms <- function(type, start, end, date_format = "%d-%m-%Y", write = FALSE) {
dates <- dates_url(start_date = start, end_date = end, date_format = date_format)
df_aux <- request_norm_dates(type, dates)
missing <- which(is.na(df_aux$Number), arr.ind = TRUE)
missing_dates <- missing %>% {
row.names(df_aux[., ])
}
if (length(missing) > 0) {
df_norms_comp <- retry_request(df_aux, type, missing_dates)
} else {
df_norms_comp <- df_aux
}
df_norms_comp["Month"] <- rownames(df_norms_comp) %>%
str_extract_all("(?<=fechapro2=)[0-9%F]+") %>%
str_replace_all("%2F", "-") %>%
as.Date("%d-%m-%Y")
df_comp_nodupl <- df_norms_comp[!duplicated(df_norms_comp[, 1]) | !duplicated(df_norms_comp[, 6]), ] %>%
{
.[order(.$Type, .$Month), ]
} %>%
{
.[, c("Month", "Type", "Number", "Title", "Type2", "URL")]
} %>%
{
.[!(.$Title == ""), ]
} %>%
`rownames<-`(NULL)
if (write == TRUE) {
write.csv(df_comp_nodupl, paste0(
"Data/", type, "_", df_comp_nodupl$Month[1], "_",
df_comp_nodupl$Month[nrow(df_comp_nodupl)], ".csv"
), row.names = FALSE)
}
return(df_comp_nodupl)
}
# Update function ------------
update_request <- function(df, type, end, date_format = "%d-%m-%Y", write = FALSE) {
df$Month <- as.Date(df$Month)
start <- df[order(df$Month), ] %>% .[[nrow(.), 1]] + 1
df_update <- df_norms(type, start, end, date_format = date_format, write = FALSE)
df_comp_nodupl <- rbind.data.frame(df, df_update, stringsAsFactors = F)
if (write == TRUE) {
write.csv(df_comp_nodupl, paste0(
"Data/", type, "_", df_comp_nodupl$Month[1], "_",
df_comp_nodupl$Month[nrow(df_comp_nodupl)], ".csv"
), row.names = FALSE)
}
return(df_comp_nodupl)
}