| @@ -2,6 +2,10 @@ | |||
| export("%>%") | |||
| export(backup_tweets) | |||
| export(gathertweet_search) | |||
| export(gathertweet_simplify) | |||
| export(gathertweet_timeline) | |||
| export(gathertweet_update) | |||
| export(get_user_info) | |||
| export(install_gathertweet) | |||
| export(last_seen_tweet) | |||
| @@ -0,0 +1,152 @@ | |||
| #' @title gathertweet actions | |||
| #' @export | |||
| gathertweet_search <- function( | |||
| terms, | |||
| file = "tweets.rds", | |||
| n = 18000, | |||
| max_id = NULL, | |||
| since_id = "last", | |||
| type = "recent", | |||
| include_rts = FALSE, | |||
| geocode = NULL, | |||
| `no-parse` = FALSE, | |||
| token = NULL, | |||
| retryonratelimit = FALSE, | |||
| quiet = FALSE, | |||
| ... | |||
| ) { | |||
| log_info("Searching for \"{paste0(terms, collapse = '\", \"')}\"") | |||
| since_id <- if (is.null(max_id)) { | |||
| if (since_id == "last") { | |||
| last_seen_tweet(file = file) | |||
| } else if (since_id == "none") { | |||
| NULL | |||
| } else since_id | |||
| } | |||
| if (!is.null(since_id)) log_info("Tweets from {since_id}") | |||
| if (!is.null(max_id)) log_info("Tweets up to {max_id}") | |||
| tweets <- lapply( | |||
| terms, | |||
| function(term) rtweet::search_tweets( | |||
| q = term, | |||
| n = as.integer(n), | |||
| type = type, | |||
| include_rts = include_rts, | |||
| geocode = geocode, | |||
| max_id = max_id, | |||
| parse = isFALSE(`no-parse`), | |||
| token = token, | |||
| retryonratelimit = retryonratelimit, | |||
| verbose = isFALSE(quiet), | |||
| since_id = since_id | |||
| ) | |||
| ) | |||
| if (isTRUE(`no-parse`)) { | |||
| log_info("Saving un-parsed tweets in {file}") | |||
| saveRDS(tweets, file) | |||
| } else { | |||
| tweets <- dplyr::bind_rows(tweets) | |||
| if (nrow(tweets) == 0) { | |||
| log_info("No new tweets.") | |||
| exit() | |||
| } | |||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||
| tweets <- tweets[order(tweets$status_id), ] | |||
| log_info("Gathered {nrow(tweets)} tweets") | |||
| tweets <- save_tweets(tweets, file) | |||
| log_info("Total of {nrow(tweets)} tweets in {file}") | |||
| } | |||
| tweets | |||
| } | |||
| #' @export | |||
| gathertweet_update <- function(file = "tweets.rds", `no-parse` = FALSE, token = NULL, ...) { | |||
| logger("Updating tweets in {file}") | |||
| if (!file.exists(file)) { | |||
| log_fatal("`{file}` does not exist") | |||
| } | |||
| tweets <- update_tweets( | |||
| file = file, | |||
| # passed to rtweet::lookup_statuses() | |||
| parse = isFALSE(`no-parse`), | |||
| token = token | |||
| ) | |||
| log_debug("Status lookup returned {nrow(tweets)} tweets") | |||
| tweets <- save_tweets(tweets, file) | |||
| log_debug("Total of {nrow(tweets)} tweets in {file}") | |||
| tweets | |||
| } | |||
| #' @export | |||
| gathertweet_timeline <- function( | |||
| users, | |||
| file = "tweets.rds", | |||
| n = 3200, | |||
| max_id = NULL, | |||
| home = TRUE, | |||
| `no-parse` = FALSE, | |||
| token = NULL, | |||
| include_rts = FALSE, | |||
| ... | |||
| ) { | |||
| log_info("Gathering tweets by {collapse(users)}") | |||
| n <- as.integer(n) | |||
| if (n > 3200) { | |||
| log_warn("Twitter API for timelines returns a maximum of 3200 tweets per user") | |||
| } | |||
| tweets <- rtweet::get_timeline( | |||
| user = users, | |||
| n = n, | |||
| max_id = max_id, | |||
| home = isTRUE(home), | |||
| parse = isFALSE(`no-parse`), | |||
| check = TRUE, | |||
| token = token, | |||
| include_rts = isTRUE(include_rts) | |||
| ) | |||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||
| tweets <- tweets[order(tweets$status_id), ] | |||
| log_info("Gathered {nrow(tweets)} tweets from {length(users)} users") | |||
| tweets <- save_tweets(tweets, file) | |||
| log_info("Total of {nrow(tweets)} tweets in {file}") | |||
| tweets | |||
| } | |||
| #' @export | |||
| gathertweet_simplify <- function( | |||
| file = "tweets.rds", | |||
| fields = NULL, | |||
| output = NULL, | |||
| ... | |||
| ) { | |||
| logger("Simplifying tweets in {file}") | |||
| if (!file.exists(file)) { | |||
| log_fatal("`{file}` does not exist") | |||
| } | |||
| tweets_simplified <- simplify_tweets( | |||
| tweets = NULL, | |||
| file = file, | |||
| .fields = fields | |||
| ) | |||
| log_debug("Simplified {nrow(tweets_simplified)} tweets") | |||
| if (is.null(output)) { | |||
| output <- gathertweet:::path_add(file, append = "_simplified") | |||
| } | |||
| log_info("Saving simplified tweets to {output}") | |||
| save_tweets(tweets_simplified, output) | |||
| } | |||
| @@ -5,6 +5,8 @@ | |||
| futile.logger::flog.layout(gathertweet_layout, name = "gathertweet") | |||
| } | |||
| collapse <- function(..., sep = ", ") paste(..., collapse = sep) | |||
| #' @title Logging functions | |||
| #' @export | |||
| logger <- function(..., level = "info", envir = parent.frame()) { | |||
| @@ -7,7 +7,7 @@ output: github_document | |||
| ```{r setup, include = FALSE} | |||
| knitr::opts_chunk$set( | |||
| collapse = TRUE, | |||
| cache = TRUE, | |||
| cache = FALSE, | |||
| comment = "", | |||
| prompt = TRUE, | |||
| fig.path = "man/figures/README-", | |||
| @@ -85,50 +85,49 @@ Get 100 \#rstats tweets | |||
| ``` bash | |||
| > gathertweet search --n 100 --quiet "#rstats" | |||
| [2019-05-04 14:52:15] [INFO] ---- gathertweet search start ---- | |||
| [2019-05-04 14:52:15] [INFO] Searching for "#rstats" | |||
| [2019-05-04 14:52:16] [INFO] Gathered 100 tweets | |||
| [2019-05-04 14:52:16] [INFO] Total of 100 tweets in tweets.rds | |||
| [2019-05-04 14:52:16] [INFO] ---- gathertweet search complete ---- | |||
| INFO [2019-05-06 21:56:27] ---- gathertweet search start ---- | |||
| INFO [2019-05-06 21:56:27] Searching for "#rstats" | |||
| INFO [2019-05-06 21:56:28] Gathered 98 tweets | |||
| INFO [2019-05-06 21:56:28] Total of 98 tweets in tweets.rds | |||
| INFO [2019-05-06 21:56:28] ---- gathertweet search complete ---- | |||
| ``` | |||
| Get more tweets, automatically starting from end of the last search | |||
| ``` bash | |||
| > gathertweet search --n 100 --quiet "#rstats" | |||
| [2019-05-04 14:53:17] [INFO] ---- gathertweet search start ---- | |||
| [2019-05-04 14:53:17] [INFO] Searching for "#rstats" | |||
| [2019-05-04 14:53:17] [INFO] Tweets from 1124748486971359232 | |||
| [2019-05-04 14:53:17] [INFO] Gathered 1 tweets | |||
| [2019-05-04 14:53:17] [INFO] Total of 100 tweets in tweets.rds | |||
| [2019-05-04 14:53:17] [INFO] ---- gathertweet search complete ---- | |||
| INFO [2019-05-06 21:57:29] ---- gathertweet search start ---- | |||
| INFO [2019-05-06 21:57:29] Searching for "#rstats" | |||
| INFO [2019-05-06 21:57:29] Tweets from 1125579895403352064 | |||
| INFO [2019-05-06 21:57:29] No new tweets. | |||
| ``` | |||
| Update the stored data about those \#rstats tweets | |||
| ``` bash | |||
| > gathertweet update | |||
| [2019-05-04 14:53:18] [INFO] ---- gathertweet update start ---- | |||
| [2019-05-04 14:53:18] [INFO] Updating tweets in tweets.rds | |||
| [2019-05-04 14:53:18] [INFO] Getting 100 tweets | |||
| [2019-05-04 14:53:19] [INFO] ---- gathertweet update complete ---- | |||
| INFO [2019-05-06 21:57:30] ---- gathertweet update start ---- | |||
| INFO [2019-05-06 21:57:30] Updating tweets in tweets.rds | |||
| INFO [2019-05-06 21:57:30] Getting 98 tweets | |||
| INFO [2019-05-06 21:57:31] ---- gathertweet update complete ---- | |||
| ``` | |||
| ``` bash | |||
| > ls -lh | |||
| total 40K | |||
| -rw-rw-r-- 1 garrick garrick 39K May 4 14:53 tweets.rds | |||
| -rw-rw-r-- 1 garrick garrick 39K May 6 21:57 tweets.rds | |||
| ``` | |||
| Gather user timelines | |||
| ``` bash | |||
| > gathertweet timeline hadleywickham jennybryan dataandme | |||
| [2019-05-04 21:11:54] [INFO] ---- gathertweet timeline start ---- | |||
| [2019-05-04 21:11:54] [INFO] Gathering tweets by hadleywickham, jennybryan, dataandme | |||
| [2019-05-04 21:12:23] [INFO] Gathered 7368 tweets from 3 users | |||
| [2019-05-04 21:12:23] [INFO] Total of 7368 tweets in tweets.rds | |||
| [2019-05-04 21:12:23] [INFO] ---- gathertweet timeline complete ---- | |||
| INFO [2019-05-06 21:57:32] ---- gathertweet timeline start ---- | |||
| INFO [2019-05-06 21:57:32] Gathering tweets by hadleywickham, jennybryan, dataandme | |||
| WARN [2019-05-06 21:57:32] Twitter API for timelines returns a maximum of 3200 tweets per user | |||
| INFO [2019-05-06 21:58:01] Gathered 7427 tweets from 3 users | |||
| INFO [2019-05-06 21:58:02] Total of 7524 tweets in tweets.rds | |||
| INFO [2019-05-06 21:58:02] ---- gathertweet timeline complete ---- | |||
| ``` | |||
| ### Schedule tweet gathering using cron | |||
| @@ -157,52 +156,58 @@ crontab -e | |||
| Usage: | |||
| gathertweet search [--file=<file>] [options] [--] <terms>... | |||
| gathertweet timeline [options] [--] <users>... | |||
| gathertweet update [--file=<file> --token=<token> --backup --backup-dir=<dir> --polite --debug-args] | |||
| gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>] | |||
| gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...] | |||
| Arguments | |||
| <terms> Search terms. Individual search terms are queried separately, | |||
| but duplicated tweets are removed from the stored results. | |||
| Each search term counts against the 15 minute rate limit of 180 | |||
| searches, which can be avoided by manually joining search terms | |||
| into a single query. WARNING: Wrap queries with spaces in | |||
| 'single quotes': double quotes are allowed inside single quotes only. | |||
| <fields> Tweet fields that should be included. Default value will include | |||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||
| Options: | |||
| -h --help Show this screen. | |||
| --file <file> Name of RDS file where tweets are stored [default: tweets.rds] | |||
| --file <file> Name of RDS file where tweets are stored | |||
| [default: tweets.rds] | |||
| --no-parse Disable parsing of the results | |||
| --token <token> See {rtweet} for more information | |||
| --retryonratelimit Wait and retry when rate limited (only relevant when n exceeds 18000 tweets) | |||
| --quiet Disable printing of {rtweet} processing/retrieval messages | |||
| --retryonratelimit Wait and retry when rate limited (only relevant when n | |||
| exceeds 18000 tweets) | |||
| --quiet Disable printing of {rtweet} processing messages | |||
| --polite Only allow one process (search|update) to run at a time | |||
| --backup Create a backup of existing tweet file before writing any new files | |||
| --backup-dir <dir> Location for backups, use "" for current directory. [default: backups] | |||
| --debug-args Print values of the arguments only | |||
| --and-simplify Create additional simplified tweet set with default values. | |||
| --backup Create a backup of existing tweet file | |||
| --backup-dir <dir> Location for backups [default: backups] | |||
| --debug-args Debug input arguments | |||
| --and-simplify Create additional simplified tweet set. | |||
| Run `gathertweet simplify` manually for more control. | |||
| search: | |||
| <terms> Search terms. Individual search terms are queried separately, | |||
| but duplicated tweets are removed from the stored results. | |||
| Each search term counts against the 15 minute rate limit of 180 | |||
| searches, which can be avoided by manually joining search terms | |||
| into a single query. NOTE: Wrap queries with spaces in | |||
| 'single quotes': only use double quotes within single quotes. | |||
| --type <type> Type of search results: "recent", "mixed", or "popular" | |||
| [default: recent] | |||
| --geocode <geocode> Geographical limiter of the template | |||
| "latitude,longitude,radius" | |||
| --since_id <since_id> Return results with an ID greather than (newer than) or | |||
| equal to since_id, automatically extracted from the | |||
| existing tweets <file>, if it exists, and ignored when | |||
| <max_id> is set. Use "none" for all available tweets, | |||
| or "last" for the maximum seen status_id in existing | |||
| tweets. [default: last] | |||
| search and timeline: | |||
| -n, --n <n> Number of tweets to return [default: 18000] | |||
| --include_rts Logical indicating whether retweets should be included | |||
| --max_id <max_id> Return results with an ID less than (older than) or equal to max_id | |||
| search: | |||
| --type <type> Type of search results: "recent", "mixed", or "popular". [default: recent] | |||
| --geocode <geocode> Geographical limiter of the template "latitude,longitude,radius" | |||
| --since_id <since_id> Return results with an ID greather than (newer than) or equal to since_id, | |||
| automatically extracted from the existing tweets <file>, if it exists, and | |||
| ignored when <max_id> is set. Use "none" for all available tweets, | |||
| or "last" for the maximum seen status_id in existing tweets. [default: last] | |||
| -n, --n <n> Number of tweets to return [default: 18000] | |||
| --include_rts Logical indicating whether retweets should be included | |||
| (default is to exclude RTs) | |||
| --max_id <max_id> Return tweets with an ID less (older) than or equal to | |||
| timeline: | |||
| --home If included, returns home-timeline instead of user-timeline. | |||
| <users> A list of users as user names, IDs, or a mixture of both, | |||
| separated by spaces. | |||
| --home If included, returns home-timeline instead of user-timeline. | |||
| simplify: | |||
| --output <output> Output file, default is input file with `_simplified` appended to name. | |||
| <fields> Tweet fields that should be included. By default includes: | |||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||
| --output <output> Output file, default is input file with `_simplified` | |||
| appended to name. | |||
| @@ -1,60 +1,66 @@ | |||
| #! /usr/bin/env Rscript | |||
| # Usage ------------------------------------------------------------------- | |||
| # Usage ----------------------------------------------------------------------- | |||
| 'Gather tweets from the command line | |||
| Usage: | |||
| gathertweet search [--file=<file>] [options] [--] <terms>... | |||
| gathertweet timeline [options] [--] <users>... | |||
| gathertweet update [--file=<file> --token=<token> --backup --backup-dir=<dir> --polite --debug-args] | |||
| gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>] | |||
| gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...] | |||
| Arguments | |||
| <terms> Search terms. Individual search terms are queried separately, | |||
| but duplicated tweets are removed from the stored results. | |||
| Each search term counts against the 15 minute rate limit of 180 | |||
| searches, which can be avoided by manually joining search terms | |||
| into a single query. WARNING: Wrap queries with spaces in | |||
| \'single quotes\': double quotes are allowed inside single quotes only. | |||
| <fields> Tweet fields that should be included. Default value will include | |||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||
| Options: | |||
| -h --help Show this screen. | |||
| --file <file> Name of RDS file where tweets are stored [default: tweets.rds] | |||
| --file <file> Name of RDS file where tweets are stored | |||
| [default: tweets.rds] | |||
| --no-parse Disable parsing of the results | |||
| --token <token> See {rtweet} for more information | |||
| --retryonratelimit Wait and retry when rate limited (only relevant when n exceeds 18000 tweets) | |||
| --quiet Disable printing of {rtweet} processing/retrieval messages | |||
| --retryonratelimit Wait and retry when rate limited (only relevant when n | |||
| exceeds 18000 tweets) | |||
| --quiet Disable printing of {rtweet} processing messages | |||
| --polite Only allow one process (search|update) to run at a time | |||
| --backup Create a backup of existing tweet file before writing any new files | |||
| --backup-dir <dir> Location for backups, use "" for current directory. [default: backups] | |||
| --debug-args Print values of the arguments only | |||
| --and-simplify Create additional simplified tweet set with default values. | |||
| --backup Create a backup of existing tweet file | |||
| --backup-dir <dir> Location for backups [default: backups] | |||
| --debug-args Debug input arguments | |||
| --and-simplify Create additional simplified tweet set. | |||
| Run `gathertweet simplify` manually for more control. | |||
| search: | |||
| <terms> Search terms. Individual search terms are queried separately, | |||
| but duplicated tweets are removed from the stored results. | |||
| Each search term counts against the 15 minute rate limit of 180 | |||
| searches, which can be avoided by manually joining search terms | |||
| into a single query. NOTE: Wrap queries with spaces in | |||
| \'single quotes\': only use double quotes within single quotes. | |||
| --type <type> Type of search results: "recent", "mixed", or "popular" | |||
| [default: recent] | |||
| --geocode <geocode> Geographical limiter of the template | |||
| "latitude,longitude,radius" | |||
| --since_id <since_id> Return results with an ID greather than (newer than) or | |||
| equal to since_id, automatically extracted from the | |||
| existing tweets <file>, if it exists, and ignored when | |||
| <max_id> is set. Use "none" for all available tweets, | |||
| or "last" for the maximum seen status_id in existing | |||
| tweets. [default: last] | |||
| search and timeline: | |||
| -n, --n <n> Number of tweets to return [default: 18000] | |||
| --include_rts Logical indicating whether retweets should be included | |||
| --max_id <max_id> Return results with an ID less than (older than) or equal to max_id | |||
| search: | |||
| --type <type> Type of search results: "recent", "mixed", or "popular". [default: recent] | |||
| --geocode <geocode> Geographical limiter of the template "latitude,longitude,radius" | |||
| --since_id <since_id> Return results with an ID greather than (newer than) or equal to since_id, | |||
| automatically extracted from the existing tweets <file>, if it exists, and | |||
| ignored when <max_id> is set. Use "none" for all available tweets, | |||
| or "last" for the maximum seen status_id in existing tweets. [default: last] | |||
| -n, --n <n> Number of tweets to return [default: 18000] | |||
| --include_rts Logical indicating whether retweets should be included | |||
| (default is to exclude RTs) | |||
| --max_id <max_id> Return tweets with an ID less (older) than or equal to | |||
| timeline: | |||
| --home If included, returns home-timeline instead of user-timeline. | |||
| <users> A list of users as user names, IDs, or a mixture of both, | |||
| separated by spaces. | |||
| --home If included, returns home-timeline instead of user-timeline. | |||
| simplify: | |||
| --output <output> Output file, default is input file with `_simplified` appended to name. | |||
| <fields> Tweet fields that should be included. By default includes: | |||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||
| --output <output> Output file, default is input file with `_simplified` | |||
| appended to name. | |||
| ' -> doc | |||
| library(docopt) | |||
| @@ -67,140 +73,76 @@ if (args$`--debug-args`) { | |||
| exit() | |||
| } | |||
| library(gathertweet) | |||
| collapse <- function(..., sep = ", ") paste(..., collapse = sep) | |||
| do_gathertweet <- function() { | |||
| library(gathertweet) | |||
| collapse <- function(..., sep = ", ") paste(..., collapse = sep) | |||
| # Which action was called? | |||
| valid_actions <- c("search", "update", "simplify", "timeline") | |||
| action <- names(Filter(isTRUE, args[valid_actions])) | |||
| if (!length(action)) { | |||
| log_fatal("Please specify a valid action: {collapse(valid_actions)}") | |||
| } | |||
| # Which action was called? | |||
| valid_actions <- c("search", "update", "simplify", "timeline") | |||
| action <- names(Filter(isTRUE, args[valid_actions])) | |||
| if (!length(action)) { | |||
| log_fatal("Please specify a valid action: {collapse(valid_actions)}") | |||
| } | |||
| if (args$polite) { | |||
| lockfile <- paste0(".gathertweet_", | |||
| digest::digest(args[c("file", "search", "update", "simplify")]), | |||
| ".lock") | |||
| lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0) | |||
| gathertweet:::stopifnot_locked(lck, "Another gathertweet {action} process is currently running for {args$file}") | |||
| } | |||
| if (args$polite) { | |||
| lockfile <- paste0( | |||
| ".gathertweet_", | |||
| digest::digest(args[c("file", "search", "update", "simplify")]), | |||
| ".lock" | |||
| ) | |||
| lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0) | |||
| gathertweet:::stopifnot_locked( | |||
| lck, | |||
| "Another gathertweet {action} process is currently running for {args$file}" | |||
| ) | |||
| on.exit({ | |||
| filelock::unlock(lck) | |||
| unlink(lockfile) | |||
| }) | |||
| } | |||
| log_info("---- gathertweet {action} start ----") | |||
| log_info("---- gathertweet {action} start ----") | |||
| if (isTRUE(args$backup)) { | |||
| backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||
| } | |||
| # Search ------------------------------------------------------------------ | |||
| if (isTRUE(args$search)) { | |||
| # Also simplify if --and-simplify flag is called | |||
| if (args[["--and-simplify"]]) args$simplify <- TRUE | |||
| log_info("Searching for \"{paste0(args$terms, collapse = '\", \"')}\"") | |||
| tweets <- | |||
| # Search ---- | |||
| if (isTRUE(args$search)) { | |||
| max_id <- args[["max_id"]] | |||
| since_id <- args[["since_id"]] | |||
| since_id <- if (is.null(max_id)) { | |||
| if (since_id == "last") { | |||
| last_seen_tweet(file = args$file) | |||
| } else if (since_id == "none") { | |||
| NULL | |||
| } else since_id | |||
| } | |||
| if (!is.null(since_id)) log_info("Tweets from {since_id}") | |||
| if (!is.null(max_id)) log_info("Tweets up to {max_id}") | |||
| tweets <- lapply( | |||
| args$term, | |||
| function(term) rtweet::search_tweets( | |||
| q = term, | |||
| n = as.integer(args$n), | |||
| type = args$type, | |||
| include_rts = args$include_rts, | |||
| geocode = args$geocode, | |||
| max_id = max_id, | |||
| parse = !args[["no-parse"]], | |||
| token = args$token, | |||
| retryonratelimit = args$retryonratelimit, | |||
| verbose = !args$quiet, | |||
| since_id = since_id | |||
| ) | |||
| ) | |||
| do.call("gathertweet_search", args) | |||
| tweets <- dplyr::bind_rows(tweets) | |||
| # Update ---- | |||
| } else if (isTRUE(args$update)) { | |||
| if (nrow(tweets) == 0) { | |||
| log_info("No new tweets.") | |||
| exit() | |||
| } | |||
| do.call("gathertweet_update", args) | |||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||
| tweets <- tweets[order(tweets$status_id), ] | |||
| log_info("Gathered {nrow(tweets)} tweets") | |||
| if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||
| tweets <- save_tweets(tweets, args$file) | |||
| log_info("Total of {nrow(tweets)} tweets in {args$file}") | |||
| # Update ------------------------------------------------------------------ | |||
| } else if (isTRUE(args$update)) { | |||
| logger("Updating tweets in {args$file}") | |||
| tweets <- update_tweets( | |||
| file = args$file, | |||
| # passed to rtweet::lookup_statuses() | |||
| parse = !args[["no-parse"]], | |||
| token = args$token | |||
| ) | |||
| log_debug("Status lookup returned {nrow(tweets)} tweets") | |||
| if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||
| tweets <- save_tweets(tweets, args$file) | |||
| log_debug("Total of {nrow(tweets)} tweets in {args$file}") | |||
| } else if (isTRUE(args$timeline)) { | |||
| if (!length(args$users)) { | |||
| stop("Please provide a list of users as user names, user IDs, or a mixture of both.") | |||
| } | |||
| # Timeline ---- | |||
| } else if (isTRUE(args$timeline)) { | |||
| if (!length(args$users)) { | |||
| stop("Please provide a list of users as user names, user IDs, ", | |||
| "or a mixture of both.") | |||
| } | |||
| log_info("Gathering tweets by {collapse(args$users)}") | |||
| if (args[["--and-simplify"]]) args$simplify <- TRUE | |||
| do.call("gathertweet_timeline", args) | |||
| } | |||
| tweets <- rtweet::get_timeline( | |||
| user = args[["users"]], | |||
| n = min(as.integer(args[["n"]]), 3200), | |||
| max_id = args[["max_id"]], | |||
| home = isTRUE(args[["home"]]), | |||
| parse = isFALSE(args[["no-parse"]]), | |||
| check = TRUE, | |||
| token = args$token, | |||
| include_rts = isTRUE(args[["include-rts"]]) | |||
| ) | |||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||
| tweets <- tweets[order(tweets$status_id), ] | |||
| log_info("Gathered {nrow(tweets)} tweets from {length(args$users)} users") | |||
| if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||
| tweets <- save_tweets(tweets, args$file) | |||
| log_info("Total of {nrow(tweets)} tweets in {args$file}") | |||
| } | |||
| # Simplify ---------------------------------------------------------------- | |||
| if (isTRUE(args$simplify)) { | |||
| do.call("gathertweet_simplify", args) | |||
| } | |||
| # Simplify ---------------------------------------------------------------- | |||
| if (isTRUE(args$simplify)) { | |||
| logger("Simplifying tweets in {args$file}") | |||
| tweets_simplified <- simplify_tweets( | |||
| tweets = NULL, | |||
| file = args$file, | |||
| .fields = args$fields | |||
| ) | |||
| log_debug("Simplified {nrow(tweets_simplified)} tweets") | |||
| if (is.null(args$output)) { | |||
| args$output <- gathertweet:::path_add(args$file, append = "_simplified") | |||
| if (args$polite) { | |||
| filelock::unlock(lck) | |||
| unlink(lockfile) | |||
| } | |||
| log_info("Saving simplified tweets to {args$output}") | |||
| tweets_simplfied <- save_tweets(tweets_simplified, args$output) | |||
| } | |||
| if (args$polite) { | |||
| filelock::unlock(lck) | |||
| unlink(lockfile) | |||
| log_info("---- gathertweet {action} complete ----") | |||
| } | |||
| log_info("---- gathertweet {action} complete ----") | |||
| do_gathertweet() | |||
| @@ -0,0 +1,14 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/gathertweet_actions.R | |||
| \name{gathertweet_search} | |||
| \alias{gathertweet_search} | |||
| \title{gathertweet actions} | |||
| \usage{ | |||
| gathertweet_search(terms, file = "tweets.rds", n = 18000, | |||
| max_id = NULL, since_id = "last", type = "recent", | |||
| include_rts = FALSE, geocode = NULL, `no-parse` = FALSE, | |||
| token = NULL, retryonratelimit = FALSE, quiet = FALSE, ...) | |||
| } | |||
| \description{ | |||
| gathertweet actions | |||
| } | |||