--- title: PROBLEMS author: Garrick Aden-Buie format: pdf execute: echo: true --- ## Setup ```{r} library(tidyverse) library(fs) pkgload::load_all(here::here("process")) ``` ```{r load-data} cf <- cf_prep_db_create() ``` ```{r load-data-report_list} report_list <- cf$report_list |> collect() |> mutate(across(doc_name, as_report_factor)) ``` ### Problem scoping For helping determine the size of the problem ```{r} expenditures_by_report <- cf$expenditures |> summarize( n_expenses = n(), total_expenses = sum(amount), .by = report_id ) |> collect() |> full_join(report_list["report_id"], by = "report_id") |> replace_na(list(n_expenses = 0, total_expenses = 0)) receipts_by_report <- cf$receipts |> summarize( n_receipts = n(), total_receipts = sum(amount), .by = report_id ) |> collect() |> full_join(report_list["report_id"], by = "report_id") |> replace_na(list(n_receipts = 0, total_receipts = 0)) ``` ## Doc search problems ```{r} report_cover_report_type <- report_list |> mutate(report_type_listed = paste(year, doc_name)) |> select(report_id, sboe_id, report_type_listed) |> left_join( cf$cover |> select(sboe_id, report_id, report_type_cover = report_type) |> collect() ) report_cover_report_type |> count(report_type_listed == report_type_cover) report_cover_report_type |> filter(report_type_listed != report_type_cover) ``` ```{r} report_cover_report_type |> filter(report_type_listed != report_type_cover) |> left_join(expenditures_by_report) |> left_join(receipts_by_report) |> arrange(total_receipts) ``` In some of these cases, the cover is probably wrong: ```{r} report_cover_report_type |> filter(report_type_listed != report_type_cover) |> left_join( cf$cover |> select(report_id, date_from, date_to) |> collect() ) |> left_join( reporting_schedule() |> mutate(report_type_sched = paste(year, doc_name)) |> select(report_type_sched, sboe_start_date, sboe_end_date), by = c(date_from = "sboe_start_date", date_to = "sboe_end_date") ) ``` ## Dates ```{r} report_dates <- tar_read(report_dates, store = here::here("process/_targets")) ``` ```{r} report_dates |> filter(sboe_start_date != cover_start_date) # 3,422 report_dates |> filter(sboe_end_date != cover_end_date) # 590 report_dates |> filter(received_image < cover_start_date) # 60 report_dates |> filter(received_image < cover_end_date) # 222 report_dates |> filter(received_data < cover_start_date) # 2 report_dates |> filter(received_data < cover_end_date) # 45 report_dates |> filter(cover_date_filed < cover_end_date) # 950 ``` ## Picking amended Picking the correct amended report is problematic because no date in the `report_list` can really be trusted. ### Interestingly problematic ```{r} # STA-C3235N-C-001 2017 Year End Semi-Annual # WAK-56BLZN-C-001 2020 Mid Year Semi-Annual CITIZENS FOR TOMMY MATTHEWS # STA-Z6M8TR-C-001 2017 Year End Semi-Annual FIREFIGHTERS FOR RESPON ```