process_report_dates <- function(report_list_raw, cover_raw) { cover_dates <- cover_raw |> distinct() |> select( report_id, cover_start_date = date_from, cover_end_date = date_to, cover_date_filed = date_filed ) report_list_raw |> left_join(reporting_schedule(), by = join_by(year, doc_name)) |> select( report_id, sboe_id, year, doc_name, amended, contains("received_"), matches("(sboe_)?(start|end)_date") ) |> left_join(cover_dates, by = "report_id") |> mutate(across(matches("received|date"), na_if_obviously_wrong_date)) |> mutate( # If the received date isn't after at least one of the report or cover date, don't believe it received_image = received_image |> na_if_not_after_one_of(sboe_start_date, cover_start_date), received_data = received_data |> na_if_not_after_one_of(sboe_start_date, cover_start_date) ) } na_if_obviously_wrong_date <- function(x) { x[x > today()] <- NA_Date_ x[x < ymd("2016-01-01")] <- NA_Date_ x } na_if_not_after_one_of <- function(x, ...) { others <- list(...) is_after <- function(x, y) { ret <- x >= y ret[is.na(ret)] <- FALSE ret } allow <- purrr::map(others, is_after, x = x) |> purrr::reduce(`|`) x[!allow] <- NA_Date_ x } mean_date_scalar <- function(x, y) { if (is.na(x) && is.na(y)) { return(NA_Date_) } mean(c(x, y), na.rm = TRUE) }