Преглед на файлове

out: candidate listing and officers

main
Garrick Aden-Buie преди 2 години
родител
ревизия
0a13f0ca90
No known key found for this signature in database
променени са 5 файла, в които са добавени 78 реда и са изтрити 46 реда
  1. +5
    -4
      process/R/out_candidate_listing.R
  2. +17
    -0
      process/R/out_officers.R
  3. +12
    -12
      process/R/prepare_candidates.R
  4. +33
    -23
      process/_targets.R
  5. +11
    -7
      process/_targets/meta/meta

+ 5
- 4
process/R/out_candidate_listing.R Целия файл

@@ -144,7 +144,7 @@ prep_candidate_listing <- function(
# candidate_name_on_ballot ----
candidate_name_on_ballot <-
cl_raw |>
distinct(candidate_id, election_dt, name_on_ballot) |>
distinct(candidate_id, election_dt, name_on_ballot, first_name, middle_name, last_name, name_suffix_lbl) |>
arrange(candidate_id, election_dt)

# candidate_address ----
@@ -174,7 +174,7 @@ prep_candidate_listing <- function(
# Extract contests (remaining data in candidate_listing) ----
cols_related <- setdiff(
c(colnames(candidate_name_on_ballot), colnames(candidate_address), colnames(candidate_party)),
c("election_dt", "candidate_id")
c("election_dt", colnames(candidate_names))
)

contests <-
@@ -197,7 +197,7 @@ prep_candidate_listing <- function(
current_name_on_ballot <-
candidate_name_on_ballot |>
slice_max(election_dt, by = candidate_id, n = 1, with_ties = FALSE) |>
select(-election_dt)
select(candidate_id, name_on_ballot)

current_party <-
candidate_party |>
@@ -247,7 +247,8 @@ prep_candidate_listing <- function(
relocate(name_on_ballot, .before = first_name) |>
relocate(starts_with("party"), .before = street) |>
relocate(starts_with("contest"), .before = street) |>
add_address_lookup_local(street = street, postal_code = zip_code)
add_address_lookup_local(street = street, postal_code = zip_code) |>
arrange(candidate_id)

# Return list of tables
list(

+ 17
- 0
process/R/out_officers.R Целия файл

@@ -0,0 +1,17 @@
out_officers <- function(path_data_prep_officers, path_out_report_list) {
out <- path("data-out", "officers", "officers.parquet")
dir_create(path_dir(out))

report_list <- out_open_dataset_db(path_out_report_list)

officers <-
prep_open_dataset_db(path_data_prep_officers) |>
semi_join(report_list, by = "report_id") |>
rename(address_lookup = address) |>
distinct() |>
collect()

arrow::write_parquet(officers, out)

dirname(out)
}

+ 12
- 12
process/R/prepare_candidates.R Целия файл

@@ -63,7 +63,7 @@ prepare_candidate_listing_for_matching <- function(candidate_listing) {
lg_info_target(lg_get_logger())

candidate_name <-
candidate_listing$candidates |>
candidate_listing$cl_candidates |>
mutate(
name_full = paste(first_name, middle_name, last_name, name_suffix_lbl),
name_mi = paste(first_name, substr(middle_name, 1, 1), last_name, name_suffix_lbl),
@@ -74,18 +74,22 @@ prepare_candidate_listing_for_matching <- function(candidate_listing) {
select(1, name_clean)

candidate_alias <-
candidate_listing$candidate_name_on_ballot |>
candidate_listing$cl_name_on_ballot |>
distinct(candidate_id, name_clean = name_on_ballot) |>
mutate(
name_clean = toupper(name_clean),
name_clean = gsub("[,.]", "", name_clean)
)

bind_rows(candidate_name, candidate_alias) |>
mutate(name_clean = trimws(name_clean)) |>
candidate_aka <-
candidate_alias |>
filter(grepl("[(].+[)]", name_clean)) |>
mutate(name_clean = sub(".+? \\((.+?)\\) (.+)$", "\\1 \\2", name_clean))

bind_rows(candidate_name, candidate_alias, candidate_aka) |>
mutate(name_clean = stringr::str_squish(name_clean)) |>
left_join(
candidate_listing$candidate_contact |>
select(1, street:zip_code),
candidate_listing$cl_contact |> select(1, street:zip_code),
by = "candidate_id",
relationship = "many-to-many"
) |>
@@ -119,13 +123,9 @@ fastlink_match_candidates <- function(
threshold.match = 0.8
)

names(matches)[ncol(candidates_for_matching) + 1] <- "name_on_ballot"
matches <- as_tibble(matches)

candidates_for_matching |>
left_join(
matches |> select(sboe_id, name_on_ballot),
by = join_by(sboe_id == sboe_id)
)
select(matches, sboe_id, candidate_id)
}

candidates_match <- function(

+ 33
- 23
process/_targets.R Целия файл

@@ -194,6 +194,16 @@ list(
# fastlink_donor_blocks(donor_blocks_name, em.obj = donors_em_model)
# ),

# Candidate Listing -------------------------------------------------------
tar_target(
candidate_listing_dedupe,
prep_dedupe_candidates(candidate_listing_raw)
),

tar_target(
candidate_listing,
prep_candidate_listing(candidate_listing_raw, candidate_listing_dedupe)
),

# Candidates --------------------------------------------------------------
tar_target(
@@ -204,34 +214,22 @@ list(
candidate_listing_for_matching,
prepare_candidate_listing_for_matching(candidate_listing)
),
# tar_target(
# candidates_linked,
# fastlink_candidates(
# candidates_for_matching,
# candidate_listing_for_matching
# )
# ),
# tar_target(
# candidates_matched,
# fastlink_match_candidates(
# candidates_for_matching,
# candidate_listing_for_matching,
# candidates_linked
# )
# ),

# Candidate Listing -------------------------------------------------------
tar_target(
candidate_listing_dedupe,
prep_dedupe_candidates(candidate_listing_raw)
candidates_linked,
fastlink_candidates(
candidates_for_matching,
candidate_listing_for_matching
)
),

tar_target(
candidate_listing,
prep_candidate_listing(candidate_listing_raw, candidate_listing_dedupe)
committee_candidate,
fastlink_match_candidates(
candidates_for_matching,
candidate_listing_for_matching,
candidates_linked
)
),


# Output ------------------------------------------------------------------
tar_target(path_out_report_list, out_report_list(report_list), format = "file"),

@@ -256,6 +254,12 @@ list(
format = "file"
),

tar_target(
path_out_officers,
out_officers(path_data_prep_officers, path_out_report_list),
format = "file"
),

tar_target(
path_out_expenses_payee,
out_expenses_payee(
@@ -322,6 +326,12 @@ list(
path_out_cl_party,
out_write_parquet(candidate_listing$cl_party, "cl_party"),
format = "file"
),

tar_target(
path_out_committee_candidate,
out_write_parquet(committee_candidate, "committee_candidate"),
format = "file"
)

)

+ 11
- 7
process/_targets/meta/meta Целия файл

@@ -9,16 +9,16 @@ addresses_raw|stem|741a8246ede29a98|dabddb7fe8887c09|e28e5c2d6b58ccbe|-140902334
amended_score|function|5c61b95fed292706|||||||||||||||
as_report_factor|function|36d1129ba7988043|||||||||||||||
calc_report_amended_score|function|59ad5afb0d7afcdd|||||||||||||||
candidate_listing|stem|9314768e893e1671|ca3b1051baab2a4e|8709dd2ae021a8e0|1990518913||t19707.5809075088s|b5526baf434fb83e|2319372|rds|local|vector|||15.289|Detected an unexpected manytomany relationship between x and y.ℹ Row 46 of x matches multiple rows in y.ℹ Row 1 of y matches multiple rows in x.ℹ If a manytomany relationship is expected, set relationship manytomany to silence this warning.. Detected an unexpected manytomany relationship between x and y.ℹ Row 14490 of x matches multiple rows in y.ℹ Row 68 of y matches multiple rows in x.ℹ If a manytomany relationship is expected, set relationship manytomany to silence this warning.|
candidate_listing|stem|2d29206012dd5a6e|ca3b1051baab2a4e|b1b10169f22e7895|1990518913||t19708.0958038563s|919bddc5b0c264cd|2443426|rds|local|vector|||17.819|Detected an unexpected manytomany relationship between x and y.ℹ Row 46 of x matches multiple rows in y.ℹ Row 1 of y matches multiple rows in x.ℹ If a manytomany relationship is expected, set relationship manytomany to silence this warning.. Detected an unexpected manytomany relationship between x and y.ℹ Row 14490 of x matches multiple rows in y.ℹ Row 68 of y matches multiple rows in x.ℹ If a manytomany relationship is expected, set relationship manytomany to silence this warning.|
candidate_listing_current_contact_info|function|e601167e7641e8b3|||||||||||||||
candidate_listing_dedupe|stem|a26b36222b5573b7|abe88f559f4615fa|bc99c62e886d7ea2|-464242793||t19707.550437869s|c4aec4062028f459|887493|rds|local|vector|||302.363||
candidate_listing_for_matching|stem|4f3d4790b862e5ea|6fdfaef3d1f25c51|91357c5fb6dffb47|-577355067||t19702.5582565998s|674e7fd4b2bbb2e7|752816|rds|local|vector|||0.894||
candidate_listing_for_matching|stem|a6d16521616c1897|6fdfaef3d1f25c51|81b8064552ef57ff|-577355067||t19708.095820139s|68e20938d3183134|750771|rds|local|vector|||0.385||
candidate_listing_raw|stem|04ff27c1189c6935|97f430d4b14b167a|0e0ef311e96b6957|1186188557||t19703.0853911025s|5a86d1a43431a527|1440919|rds|local|vector|||8.573||
candidates|stem|b6768405150404c4|71001cadf25750e1|de8a72a396af78b4|1152396829||t19694.6016087732s|b322e3837fc533af|116166|rds|local|vector|||4.115||
candidates_for_matching|stem|1cb7ac6f316c807e|42bcfac35d712093|4bfa9403fc1a794d|968106745||t19701.6598809345s|727e7a16809d8693|115250|rds|local|vector|||6.81||
candidates_linked|stem|747e919af78df023|abcd351c0f8d6c0b|5589dd87808bbe82|-1922179846||t19701.6254475891s|85faa40d76c2db7d|16994|rds|local|vector|||163.128||
candidates_linked|stem|33fe2f1f5d379255|abcd351c0f8d6c0b|1584da0a43ae0cc9|-1922179846||t19708.0984197888s|03a18d8529739634|21291|rds|local|vector|||224.279|partial argument match of p to probs. partial argument match of p to probs. partial argument match of p to probs. partial argument match of p to probs. partial argument match of p to probs. partial argument match of p to probs. partial argument match of p to probs. partial argument match of p to probs|
candidates_match|function|81d6299a59c8c42b|||||||||||||||
candidates_matched|stem|5bd3f145a9843a77|20bae987c9ff2e92|b1db5931f1540e60|-250178612||t19701.6254489813s|0038749672d5215d|141371|rds|local|vector|||0.084|Detected an unexpected manytomany relationship between x and y.ℹ Row 1 of x matches multiple rows in y.ℹ Row 824 of y matches multiple rows in x.ℹ If a manytomany relationship is expected, set relationship manytomany to silence this warning.|
candidates_matched|stem|c6a2275e8b56b2d6|20bae987c9ff2e92|c48ac389be1bcc8c|-250178612||t19708.0140315185s|3132e98bbdd491d9|134791|rds|local|vector|||0.12|Detected an unexpected manytomany relationship between x and y.ℹ Row 9 of x matches multiple rows in y.ℹ Row 1117 of y matches multiple rows in x.ℹ If a manytomany relationship is expected, set relationship manytomany to silence this warning.|
cf_db_create|function|b4ebd44c9466fbf2|||||||||||||||
cf_open_dataset|function|c6c59997ba5529dd|||||||||||||||
cf_open_dataset_db|function|45e63b4c150c3213|||||||||||||||
@@ -26,6 +26,7 @@ cf_prep_db_create|function|59e20e4427ec3eb6|||||||||||||||
cf_root|function|e7a66c4131d887c2|||||||||||||||
cluster_text|function|0585ec2a10dc9332|||||||||||||||
collect_full_addresses_from_parts|function|4036d15cdad60ede|||||||||||||||
committee_candidate|stem|4c5964268c5a6b73|20bae987c9ff2e92|2bf6cf9927c631e2|1681574645||t19708.0984234528s|70e83a306ad4a18a|23435|rds|local|vector|||0.028||
committees|stem|fa467377df6b1326|672152cfa41781f1|5bf117c67cc27f88|-828971872||t19694.0609098714s|29dc1bf83a2d1a54|153990|rds|local|vector|||1.276||
count_receipt_types|function|ea6b25d6ab3486f6|||||||||||||||
cover_raw|stem|9ae721aa4d4398d6|aed65a62609940a9|b5913a9bada6de77|2002318961||t19693.830029799s|3fe0e852265c13f9|650023|rds|local|vector|||4.549||
@@ -14136,7 +14137,7 @@ fastlink_candidates|function|5988ea33a4aa0fa3|||||||||||||||
fastlink_donor_blocks|function|a30cbc80b845de50|||||||||||||||
fastlink_donors|function|08a89210e0c7bf6d|||||||||||||||
fastlink_donors_em_model|function|91e538caa0d306a9|||||||||||||||
fastlink_match_candidates|function|9d3cdd3509894297|||||||||||||||
fastlink_match_candidates|function|8bf2ce070e1adcee|||||||||||||||
fix_sboe_id_missing|function|11f3d67b770cc3c5|||||||||||||||
fixup_po_box|function|f0c20aefb0937e2c|||||||||||||||
fixup_po_box_query|function|cbc3dceca5ad0a04|||||||||||||||
@@ -14158,6 +14159,7 @@ out_cover|function|489d09fddabd6a80|||||||||||||||
out_cover_committees|function|c03f2256017d8ddf|||||||||||||||
out_expenses|function|a03f4eb311b925e3|||||||||||||||
out_expenses_payee|function|5da7294992aaada4|||||||||||||||
out_officers|function|a646c99e1914b811|||||||||||||||
out_open_dataset|function|26ace22440bb5c7d|||||||||||||||
out_open_dataset_db|function|172328361bbc3030|||||||||||||||
out_receipts|function|cedde63c692e591f|||||||||||||||
@@ -23537,6 +23539,7 @@ path_out_cl_candidates|stem|c42483579aa7b792|530d2b80d22daa14|86567a204690b38d|-
path_out_cl_contact|stem|c2cf5470d3f92c0d|677f4afded3f56a0|86567a204690b38d|702194598|data-out/cl_contact|t19707.5829723825s|bf47e23aeffb5eb4|1102303|file|local|vector|||0.911||
path_out_cl_elections|stem|293e98ff84ee8949|c937aeb5c46a5ebd|86567a204690b38d|-188564591|data-out/cl_elections|t19707.5809144635s|41200f7f0df0ebd3|96|file|local|vector|||0.421||
path_out_cl_party|stem|3aa7322eb580a25a|7714628695a675a4|86567a204690b38d|-915641666|data-out/cl_party|t19707.5829736543s|484403ec7a88dcb0|50035|file|local|vector|||0.91||
path_out_committee_candidate|stem|2083ce60a4e01274|2dee0e4fbe71a657|33b8518d970c6e3c|-2075049413|data-out/committee_candidate|t19708.1274237369s|782218210c653499|40468|file|local|vector|||0.75||
path_out_committees|stem|e1db1943cc188990|c66a978b2fec9710|9df75ef576032d57|2056595834|data-out/committees|t19701.622756401s|d6a0dc2435f1989d|518228|file|local|vector|||0.613||
path_out_cover|stem|e194b997ce1b6cec|88239f9749ae64db|a3967a55274f63c9|1148338993|data-out/cover|t19701.6227471479s|113bcabb426cb5cb|849188|file|local|vector|||4.696||
path_out_elections|stem|5d18de4e325fb889|e3b344e58c525754|15a1dcca21694ff8|466215059|data-out/elections|t19702.073721828s|41200f7f0df0ebd3|96|file|local|vector|||0.982||
@@ -23546,6 +23549,7 @@ path_out_elections_candidates_contact|stem|9cb2dfe531d53104|78e17f505a68f7c7|15a
path_out_elections_candidates_party|stem|813f0d54d0425d21|3ffa62ada21b0f71|15a1dcca21694ff8|-1097469914|data-out/elections_candidates_party|t19702.0737206588s|41200f7f0df0ebd3|96|file|local|vector|||0.937||
path_out_expenses|stem|16b7852d04a9d6f6|223af208242981b5|fc57167eecd9ce46|-1017777136|data-out/expenses|t19701.6412139288s|41200f7f0df0ebd3|96|file|local|vector|||5.155||
path_out_expenses_payee|stem|4cb050bbb617e492|3ff4100baee20a6e|cfa87db5688cd968|-1238337025|data-out/expenses_payee|t19701.6411488082s|41200f7f0df0ebd3|96|file|local|vector|||6.547||
path_out_officers|stem|941fac280cf46c1a|36f0ce79ab45821a|47112a4e307a539c|-380620045|data-out/officers|t19708.545062986s|9942a1dd50198d76|1052800|file|local|vector|||4.537||
path_out_receipts|stem|3eaa7d0b5203fdbd|e117bdc72f7615a4|6f11610e46ee3e89|1385463881|data-out/receipts|t19701.6648095064s|96231e39016c77f9|55579720|file|local|vector|||10.944||
path_out_receipts_payer|stem|e7c063261174c6e1|e723fed8a41b4a5e|be8e59e8300a2f73|249743415|data-out/receipts_payer|t19701.6646793546s|63d7b22fe19cc423|51602128|file|local|vector|||11.379||
path_out_report_list|stem|e7c9028eec6020d0|c316e6d7a47c17b8|0dba393264ec3b40|1961790994|data-out/report_list|t19699.1541422243s|41200f7f0df0ebd3|96|file|local|vector|||0.744||
@@ -42278,7 +42282,7 @@ post_prepare_steps_for_table|function|cab3f88eebeb2f87|||||||||||||||
post_process_steps_for_table|function|cab3f88eebeb2f87|||||||||||||||
pre_prepare_table_body|function|437b34d31f233545|||||||||||||||
pre_process_table_body|function|437b34d31f233545|||||||||||||||
prep_candidate_listing|function|ef216a9ee11971d6|||||||||||||||
prep_candidate_listing|function|6d068fc765edf48b|||||||||||||||
prep_candidates_dedupe_mapping|function|fc219b85b321db70|||||||||||||||
prep_collect_addresses_raw|function|9d8af27d4d039c5b|||||||||||||||
prep_collect_addresses_raw_officers|function|748d8b26ffdd0273|||||||||||||||
@@ -42289,7 +42293,7 @@ prep_open_dataset_db|function|e85403c13f0b7ceb|||||||||||||||
prepare_addresses_create_db|function|c63b3e88c5d630f8|||||||||||||||
prepare_addresses_lookup_db|function|819826fcfedfa916|||||||||||||||
prepare_candidate_listing|function|0d6a3d7459cb56e6|||||||||||||||
prepare_candidate_listing_for_matching|function|874c69cd4a77df07|||||||||||||||
prepare_candidate_listing_for_matching|function|104a95749d67d9fe|||||||||||||||
prepare_candidates|function|27638fe3f0dc4f27|||||||||||||||
prepare_candidates_for_matching|function|2ee229f69e88f999|||||||||||||||
prepare_candidates_raw|function|648568ed67cd66ba|||||||||||||||

Loading…
Отказ
Запис