| export("%>%") | export("%>%") | ||||
| export(backup_tweets) | export(backup_tweets) | ||||
| export(gathertweet_search) | |||||
| export(gathertweet_simplify) | |||||
| export(gathertweet_timeline) | |||||
| export(gathertweet_update) | |||||
| export(get_user_info) | export(get_user_info) | ||||
| export(install_gathertweet) | export(install_gathertweet) | ||||
| export(last_seen_tweet) | export(last_seen_tweet) |
| #' @title gathertweet actions | |||||
| #' @export | |||||
| gathertweet_search <- function( | |||||
| terms, | |||||
| file = "tweets.rds", | |||||
| n = 18000, | |||||
| max_id = NULL, | |||||
| since_id = "last", | |||||
| type = "recent", | |||||
| include_rts = FALSE, | |||||
| geocode = NULL, | |||||
| `no-parse` = FALSE, | |||||
| token = NULL, | |||||
| retryonratelimit = FALSE, | |||||
| quiet = FALSE, | |||||
| ... | |||||
| ) { | |||||
| log_info("Searching for \"{paste0(terms, collapse = '\", \"')}\"") | |||||
| since_id <- if (is.null(max_id)) { | |||||
| if (since_id == "last") { | |||||
| last_seen_tweet(file = file) | |||||
| } else if (since_id == "none") { | |||||
| NULL | |||||
| } else since_id | |||||
| } | |||||
| if (!is.null(since_id)) log_info("Tweets from {since_id}") | |||||
| if (!is.null(max_id)) log_info("Tweets up to {max_id}") | |||||
| tweets <- lapply( | |||||
| terms, | |||||
| function(term) rtweet::search_tweets( | |||||
| q = term, | |||||
| n = as.integer(n), | |||||
| type = type, | |||||
| include_rts = include_rts, | |||||
| geocode = geocode, | |||||
| max_id = max_id, | |||||
| parse = isFALSE(`no-parse`), | |||||
| token = token, | |||||
| retryonratelimit = retryonratelimit, | |||||
| verbose = isFALSE(quiet), | |||||
| since_id = since_id | |||||
| ) | |||||
| ) | |||||
| if (isTRUE(`no-parse`)) { | |||||
| log_info("Saving un-parsed tweets in {file}") | |||||
| saveRDS(tweets, file) | |||||
| } else { | |||||
| tweets <- dplyr::bind_rows(tweets) | |||||
| if (nrow(tweets) == 0) { | |||||
| log_info("No new tweets.") | |||||
| exit() | |||||
| } | |||||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||||
| tweets <- tweets[order(tweets$status_id), ] | |||||
| log_info("Gathered {nrow(tweets)} tweets") | |||||
| tweets <- save_tweets(tweets, file) | |||||
| log_info("Total of {nrow(tweets)} tweets in {file}") | |||||
| } | |||||
| tweets | |||||
| } | |||||
| #' @export | |||||
| gathertweet_update <- function(file = "tweets.rds", `no-parse` = FALSE, token = NULL, ...) { | |||||
| logger("Updating tweets in {file}") | |||||
| if (!file.exists(file)) { | |||||
| log_fatal("`{file}` does not exist") | |||||
| } | |||||
| tweets <- update_tweets( | |||||
| file = file, | |||||
| # passed to rtweet::lookup_statuses() | |||||
| parse = isFALSE(`no-parse`), | |||||
| token = token | |||||
| ) | |||||
| log_debug("Status lookup returned {nrow(tweets)} tweets") | |||||
| tweets <- save_tweets(tweets, file) | |||||
| log_debug("Total of {nrow(tweets)} tweets in {file}") | |||||
| tweets | |||||
| } | |||||
| #' @export | |||||
| gathertweet_timeline <- function( | |||||
| users, | |||||
| file = "tweets.rds", | |||||
| n = 3200, | |||||
| max_id = NULL, | |||||
| home = TRUE, | |||||
| `no-parse` = FALSE, | |||||
| token = NULL, | |||||
| include_rts = FALSE, | |||||
| ... | |||||
| ) { | |||||
| log_info("Gathering tweets by {collapse(users)}") | |||||
| n <- as.integer(n) | |||||
| if (n > 3200) { | |||||
| log_warn("Twitter API for timelines returns a maximum of 3200 tweets per user") | |||||
| } | |||||
| tweets <- rtweet::get_timeline( | |||||
| user = users, | |||||
| n = n, | |||||
| max_id = max_id, | |||||
| home = isTRUE(home), | |||||
| parse = isFALSE(`no-parse`), | |||||
| check = TRUE, | |||||
| token = token, | |||||
| include_rts = isTRUE(include_rts) | |||||
| ) | |||||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||||
| tweets <- tweets[order(tweets$status_id), ] | |||||
| log_info("Gathered {nrow(tweets)} tweets from {length(users)} users") | |||||
| tweets <- save_tweets(tweets, file) | |||||
| log_info("Total of {nrow(tweets)} tweets in {file}") | |||||
| tweets | |||||
| } | |||||
| #' @export | |||||
| gathertweet_simplify <- function( | |||||
| file = "tweets.rds", | |||||
| fields = NULL, | |||||
| output = NULL, | |||||
| ... | |||||
| ) { | |||||
| logger("Simplifying tweets in {file}") | |||||
| if (!file.exists(file)) { | |||||
| log_fatal("`{file}` does not exist") | |||||
| } | |||||
| tweets_simplified <- simplify_tweets( | |||||
| tweets = NULL, | |||||
| file = file, | |||||
| .fields = fields | |||||
| ) | |||||
| log_debug("Simplified {nrow(tweets_simplified)} tweets") | |||||
| if (is.null(output)) { | |||||
| output <- gathertweet:::path_add(file, append = "_simplified") | |||||
| } | |||||
| log_info("Saving simplified tweets to {output}") | |||||
| save_tweets(tweets_simplified, output) | |||||
| } |
| futile.logger::flog.layout(gathertweet_layout, name = "gathertweet") | futile.logger::flog.layout(gathertweet_layout, name = "gathertweet") | ||||
| } | } | ||||
| collapse <- function(..., sep = ", ") paste(..., collapse = sep) | |||||
| #' @title Logging functions | #' @title Logging functions | ||||
| #' @export | #' @export | ||||
| logger <- function(..., level = "info", envir = parent.frame()) { | logger <- function(..., level = "info", envir = parent.frame()) { |
| ```{r setup, include = FALSE} | ```{r setup, include = FALSE} | ||||
| knitr::opts_chunk$set( | knitr::opts_chunk$set( | ||||
| collapse = TRUE, | collapse = TRUE, | ||||
| cache = TRUE, | |||||
| cache = FALSE, | |||||
| comment = "", | comment = "", | ||||
| prompt = TRUE, | prompt = TRUE, | ||||
| fig.path = "man/figures/README-", | fig.path = "man/figures/README-", |
| ``` bash | ``` bash | ||||
| > gathertweet search --n 100 --quiet "#rstats" | > gathertweet search --n 100 --quiet "#rstats" | ||||
| [2019-05-04 14:52:15] [INFO] ---- gathertweet search start ---- | |||||
| [2019-05-04 14:52:15] [INFO] Searching for "#rstats" | |||||
| [2019-05-04 14:52:16] [INFO] Gathered 100 tweets | |||||
| [2019-05-04 14:52:16] [INFO] Total of 100 tweets in tweets.rds | |||||
| [2019-05-04 14:52:16] [INFO] ---- gathertweet search complete ---- | |||||
| INFO [2019-05-06 21:56:27] ---- gathertweet search start ---- | |||||
| INFO [2019-05-06 21:56:27] Searching for "#rstats" | |||||
| INFO [2019-05-06 21:56:28] Gathered 98 tweets | |||||
| INFO [2019-05-06 21:56:28] Total of 98 tweets in tweets.rds | |||||
| INFO [2019-05-06 21:56:28] ---- gathertweet search complete ---- | |||||
| ``` | ``` | ||||
| Get more tweets, automatically starting from end of the last search | Get more tweets, automatically starting from end of the last search | ||||
| ``` bash | ``` bash | ||||
| > gathertweet search --n 100 --quiet "#rstats" | > gathertweet search --n 100 --quiet "#rstats" | ||||
| [2019-05-04 14:53:17] [INFO] ---- gathertweet search start ---- | |||||
| [2019-05-04 14:53:17] [INFO] Searching for "#rstats" | |||||
| [2019-05-04 14:53:17] [INFO] Tweets from 1124748486971359232 | |||||
| [2019-05-04 14:53:17] [INFO] Gathered 1 tweets | |||||
| [2019-05-04 14:53:17] [INFO] Total of 100 tweets in tweets.rds | |||||
| [2019-05-04 14:53:17] [INFO] ---- gathertweet search complete ---- | |||||
| INFO [2019-05-06 21:57:29] ---- gathertweet search start ---- | |||||
| INFO [2019-05-06 21:57:29] Searching for "#rstats" | |||||
| INFO [2019-05-06 21:57:29] Tweets from 1125579895403352064 | |||||
| INFO [2019-05-06 21:57:29] No new tweets. | |||||
| ``` | ``` | ||||
| Update the stored data about those \#rstats tweets | Update the stored data about those \#rstats tweets | ||||
| ``` bash | ``` bash | ||||
| > gathertweet update | > gathertweet update | ||||
| [2019-05-04 14:53:18] [INFO] ---- gathertweet update start ---- | |||||
| [2019-05-04 14:53:18] [INFO] Updating tweets in tweets.rds | |||||
| [2019-05-04 14:53:18] [INFO] Getting 100 tweets | |||||
| [2019-05-04 14:53:19] [INFO] ---- gathertweet update complete ---- | |||||
| INFO [2019-05-06 21:57:30] ---- gathertweet update start ---- | |||||
| INFO [2019-05-06 21:57:30] Updating tweets in tweets.rds | |||||
| INFO [2019-05-06 21:57:30] Getting 98 tweets | |||||
| INFO [2019-05-06 21:57:31] ---- gathertweet update complete ---- | |||||
| ``` | ``` | ||||
| ``` bash | ``` bash | ||||
| > ls -lh | > ls -lh | ||||
| total 40K | total 40K | ||||
| -rw-rw-r-- 1 garrick garrick 39K May 4 14:53 tweets.rds | |||||
| -rw-rw-r-- 1 garrick garrick 39K May 6 21:57 tweets.rds | |||||
| ``` | ``` | ||||
| Gather user timelines | Gather user timelines | ||||
| ``` bash | ``` bash | ||||
| > gathertweet timeline hadleywickham jennybryan dataandme | > gathertweet timeline hadleywickham jennybryan dataandme | ||||
| [2019-05-04 21:11:54] [INFO] ---- gathertweet timeline start ---- | |||||
| [2019-05-04 21:11:54] [INFO] Gathering tweets by hadleywickham, jennybryan, dataandme | |||||
| [2019-05-04 21:12:23] [INFO] Gathered 7368 tweets from 3 users | |||||
| [2019-05-04 21:12:23] [INFO] Total of 7368 tweets in tweets.rds | |||||
| [2019-05-04 21:12:23] [INFO] ---- gathertweet timeline complete ---- | |||||
| INFO [2019-05-06 21:57:32] ---- gathertweet timeline start ---- | |||||
| INFO [2019-05-06 21:57:32] Gathering tweets by hadleywickham, jennybryan, dataandme | |||||
| WARN [2019-05-06 21:57:32] Twitter API for timelines returns a maximum of 3200 tweets per user | |||||
| INFO [2019-05-06 21:58:01] Gathered 7427 tweets from 3 users | |||||
| INFO [2019-05-06 21:58:02] Total of 7524 tweets in tweets.rds | |||||
| INFO [2019-05-06 21:58:02] ---- gathertweet timeline complete ---- | |||||
| ``` | ``` | ||||
| ### Schedule tweet gathering using cron | ### Schedule tweet gathering using cron | ||||
| Usage: | Usage: | ||||
| gathertweet search [--file=<file>] [options] [--] <terms>... | gathertweet search [--file=<file>] [options] [--] <terms>... | ||||
| gathertweet timeline [options] [--] <users>... | gathertweet timeline [options] [--] <users>... | ||||
| gathertweet update [--file=<file> --token=<token> --backup --backup-dir=<dir> --polite --debug-args] | |||||
| gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>] | |||||
| gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...] | gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...] | ||||
| Arguments | |||||
| <terms> Search terms. Individual search terms are queried separately, | |||||
| but duplicated tweets are removed from the stored results. | |||||
| Each search term counts against the 15 minute rate limit of 180 | |||||
| searches, which can be avoided by manually joining search terms | |||||
| into a single query. WARNING: Wrap queries with spaces in | |||||
| 'single quotes': double quotes are allowed inside single quotes only. | |||||
| <fields> Tweet fields that should be included. Default value will include | |||||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||||
| Options: | Options: | ||||
| -h --help Show this screen. | -h --help Show this screen. | ||||
| --file <file> Name of RDS file where tweets are stored [default: tweets.rds] | |||||
| --file <file> Name of RDS file where tweets are stored | |||||
| [default: tweets.rds] | |||||
| --no-parse Disable parsing of the results | --no-parse Disable parsing of the results | ||||
| --token <token> See {rtweet} for more information | --token <token> See {rtweet} for more information | ||||
| --retryonratelimit Wait and retry when rate limited (only relevant when n exceeds 18000 tweets) | |||||
| --quiet Disable printing of {rtweet} processing/retrieval messages | |||||
| --retryonratelimit Wait and retry when rate limited (only relevant when n | |||||
| exceeds 18000 tweets) | |||||
| --quiet Disable printing of {rtweet} processing messages | |||||
| --polite Only allow one process (search|update) to run at a time | --polite Only allow one process (search|update) to run at a time | ||||
| --backup Create a backup of existing tweet file before writing any new files | |||||
| --backup-dir <dir> Location for backups, use "" for current directory. [default: backups] | |||||
| --debug-args Print values of the arguments only | |||||
| --and-simplify Create additional simplified tweet set with default values. | |||||
| --backup Create a backup of existing tweet file | |||||
| --backup-dir <dir> Location for backups [default: backups] | |||||
| --debug-args Debug input arguments | |||||
| --and-simplify Create additional simplified tweet set. | |||||
| Run `gathertweet simplify` manually for more control. | Run `gathertweet simplify` manually for more control. | ||||
| search: | |||||
| <terms> Search terms. Individual search terms are queried separately, | |||||
| but duplicated tweets are removed from the stored results. | |||||
| Each search term counts against the 15 minute rate limit of 180 | |||||
| searches, which can be avoided by manually joining search terms | |||||
| into a single query. NOTE: Wrap queries with spaces in | |||||
| 'single quotes': only use double quotes within single quotes. | |||||
| --type <type> Type of search results: "recent", "mixed", or "popular" | |||||
| [default: recent] | |||||
| --geocode <geocode> Geographical limiter of the template | |||||
| "latitude,longitude,radius" | |||||
| --since_id <since_id> Return results with an ID greather than (newer than) or | |||||
| equal to since_id, automatically extracted from the | |||||
| existing tweets <file>, if it exists, and ignored when | |||||
| <max_id> is set. Use "none" for all available tweets, | |||||
| or "last" for the maximum seen status_id in existing | |||||
| tweets. [default: last] | |||||
| search and timeline: | search and timeline: | ||||
| -n, --n <n> Number of tweets to return [default: 18000] | |||||
| --include_rts Logical indicating whether retweets should be included | |||||
| --max_id <max_id> Return results with an ID less than (older than) or equal to max_id | |||||
| search: | |||||
| --type <type> Type of search results: "recent", "mixed", or "popular". [default: recent] | |||||
| --geocode <geocode> Geographical limiter of the template "latitude,longitude,radius" | |||||
| --since_id <since_id> Return results with an ID greather than (newer than) or equal to since_id, | |||||
| automatically extracted from the existing tweets <file>, if it exists, and | |||||
| ignored when <max_id> is set. Use "none" for all available tweets, | |||||
| or "last" for the maximum seen status_id in existing tweets. [default: last] | |||||
| -n, --n <n> Number of tweets to return [default: 18000] | |||||
| --include_rts Logical indicating whether retweets should be included | |||||
| (default is to exclude RTs) | |||||
| --max_id <max_id> Return tweets with an ID less (older) than or equal to | |||||
| timeline: | timeline: | ||||
| --home If included, returns home-timeline instead of user-timeline. | |||||
| <users> A list of users as user names, IDs, or a mixture of both, | |||||
| separated by spaces. | |||||
| --home If included, returns home-timeline instead of user-timeline. | |||||
| simplify: | simplify: | ||||
| --output <output> Output file, default is input file with `_simplified` appended to name. | |||||
| <fields> Tweet fields that should be included. By default includes: | |||||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||||
| --output <output> Output file, default is input file with `_simplified` | |||||
| appended to name. |
| #! /usr/bin/env Rscript | #! /usr/bin/env Rscript | ||||
| # Usage ------------------------------------------------------------------- | |||||
| # Usage ----------------------------------------------------------------------- | |||||
| 'Gather tweets from the command line | 'Gather tweets from the command line | ||||
| Usage: | Usage: | ||||
| gathertweet search [--file=<file>] [options] [--] <terms>... | gathertweet search [--file=<file>] [options] [--] <terms>... | ||||
| gathertweet timeline [options] [--] <users>... | gathertweet timeline [options] [--] <users>... | ||||
| gathertweet update [--file=<file> --token=<token> --backup --backup-dir=<dir> --polite --debug-args] | |||||
| gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>] | |||||
| gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...] | gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...] | ||||
| Arguments | |||||
| <terms> Search terms. Individual search terms are queried separately, | |||||
| but duplicated tweets are removed from the stored results. | |||||
| Each search term counts against the 15 minute rate limit of 180 | |||||
| searches, which can be avoided by manually joining search terms | |||||
| into a single query. WARNING: Wrap queries with spaces in | |||||
| \'single quotes\': double quotes are allowed inside single quotes only. | |||||
| <fields> Tweet fields that should be included. Default value will include | |||||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||||
| Options: | Options: | ||||
| -h --help Show this screen. | -h --help Show this screen. | ||||
| --file <file> Name of RDS file where tweets are stored [default: tweets.rds] | |||||
| --file <file> Name of RDS file where tweets are stored | |||||
| [default: tweets.rds] | |||||
| --no-parse Disable parsing of the results | --no-parse Disable parsing of the results | ||||
| --token <token> See {rtweet} for more information | --token <token> See {rtweet} for more information | ||||
| --retryonratelimit Wait and retry when rate limited (only relevant when n exceeds 18000 tweets) | |||||
| --quiet Disable printing of {rtweet} processing/retrieval messages | |||||
| --retryonratelimit Wait and retry when rate limited (only relevant when n | |||||
| exceeds 18000 tweets) | |||||
| --quiet Disable printing of {rtweet} processing messages | |||||
| --polite Only allow one process (search|update) to run at a time | --polite Only allow one process (search|update) to run at a time | ||||
| --backup Create a backup of existing tweet file before writing any new files | |||||
| --backup-dir <dir> Location for backups, use "" for current directory. [default: backups] | |||||
| --debug-args Print values of the arguments only | |||||
| --and-simplify Create additional simplified tweet set with default values. | |||||
| --backup Create a backup of existing tweet file | |||||
| --backup-dir <dir> Location for backups [default: backups] | |||||
| --debug-args Debug input arguments | |||||
| --and-simplify Create additional simplified tweet set. | |||||
| Run `gathertweet simplify` manually for more control. | Run `gathertweet simplify` manually for more control. | ||||
| search: | |||||
| <terms> Search terms. Individual search terms are queried separately, | |||||
| but duplicated tweets are removed from the stored results. | |||||
| Each search term counts against the 15 minute rate limit of 180 | |||||
| searches, which can be avoided by manually joining search terms | |||||
| into a single query. NOTE: Wrap queries with spaces in | |||||
| \'single quotes\': only use double quotes within single quotes. | |||||
| --type <type> Type of search results: "recent", "mixed", or "popular" | |||||
| [default: recent] | |||||
| --geocode <geocode> Geographical limiter of the template | |||||
| "latitude,longitude,radius" | |||||
| --since_id <since_id> Return results with an ID greather than (newer than) or | |||||
| equal to since_id, automatically extracted from the | |||||
| existing tweets <file>, if it exists, and ignored when | |||||
| <max_id> is set. Use "none" for all available tweets, | |||||
| or "last" for the maximum seen status_id in existing | |||||
| tweets. [default: last] | |||||
| search and timeline: | search and timeline: | ||||
| -n, --n <n> Number of tweets to return [default: 18000] | |||||
| --include_rts Logical indicating whether retweets should be included | |||||
| --max_id <max_id> Return results with an ID less than (older than) or equal to max_id | |||||
| search: | |||||
| --type <type> Type of search results: "recent", "mixed", or "popular". [default: recent] | |||||
| --geocode <geocode> Geographical limiter of the template "latitude,longitude,radius" | |||||
| --since_id <since_id> Return results with an ID greather than (newer than) or equal to since_id, | |||||
| automatically extracted from the existing tweets <file>, if it exists, and | |||||
| ignored when <max_id> is set. Use "none" for all available tweets, | |||||
| or "last" for the maximum seen status_id in existing tweets. [default: last] | |||||
| -n, --n <n> Number of tweets to return [default: 18000] | |||||
| --include_rts Logical indicating whether retweets should be included | |||||
| (default is to exclude RTs) | |||||
| --max_id <max_id> Return tweets with an ID less (older) than or equal to | |||||
| timeline: | timeline: | ||||
| --home If included, returns home-timeline instead of user-timeline. | |||||
| <users> A list of users as user names, IDs, or a mixture of both, | |||||
| separated by spaces. | |||||
| --home If included, returns home-timeline instead of user-timeline. | |||||
| simplify: | simplify: | ||||
| --output <output> Output file, default is input file with `_simplified` appended to name. | |||||
| <fields> Tweet fields that should be included. By default includes: | |||||
| `status_id`, `created_at`, `user_id`, `screen_name`, `text`, | |||||
| `favorite_count`, `retweet_count`, `is_quote`, `hashtags`, | |||||
| `mentions_screen_name`, `profile_url`, `profile_image_url`, | |||||
| `media_url`, `urls_url`, `urls_expanded_url`. | |||||
| --output <output> Output file, default is input file with `_simplified` | |||||
| appended to name. | |||||
| ' -> doc | ' -> doc | ||||
| library(docopt) | library(docopt) | ||||
| exit() | exit() | ||||
| } | } | ||||
| library(gathertweet) | |||||
| collapse <- function(..., sep = ", ") paste(..., collapse = sep) | |||||
| do_gathertweet <- function() { | |||||
| library(gathertweet) | |||||
| collapse <- function(..., sep = ", ") paste(..., collapse = sep) | |||||
| # Which action was called? | |||||
| valid_actions <- c("search", "update", "simplify", "timeline") | |||||
| action <- names(Filter(isTRUE, args[valid_actions])) | |||||
| if (!length(action)) { | |||||
| log_fatal("Please specify a valid action: {collapse(valid_actions)}") | |||||
| } | |||||
| # Which action was called? | |||||
| valid_actions <- c("search", "update", "simplify", "timeline") | |||||
| action <- names(Filter(isTRUE, args[valid_actions])) | |||||
| if (!length(action)) { | |||||
| log_fatal("Please specify a valid action: {collapse(valid_actions)}") | |||||
| } | |||||
| if (args$polite) { | |||||
| lockfile <- paste0(".gathertweet_", | |||||
| digest::digest(args[c("file", "search", "update", "simplify")]), | |||||
| ".lock") | |||||
| lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0) | |||||
| gathertweet:::stopifnot_locked(lck, "Another gathertweet {action} process is currently running for {args$file}") | |||||
| } | |||||
| if (args$polite) { | |||||
| lockfile <- paste0( | |||||
| ".gathertweet_", | |||||
| digest::digest(args[c("file", "search", "update", "simplify")]), | |||||
| ".lock" | |||||
| ) | |||||
| lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0) | |||||
| gathertweet:::stopifnot_locked( | |||||
| lck, | |||||
| "Another gathertweet {action} process is currently running for {args$file}" | |||||
| ) | |||||
| on.exit({ | |||||
| filelock::unlock(lck) | |||||
| unlink(lockfile) | |||||
| }) | |||||
| } | |||||
| log_info("---- gathertweet {action} start ----") | |||||
| log_info("---- gathertweet {action} start ----") | |||||
| if (isTRUE(args$backup)) { | |||||
| backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||||
| } | |||||
| # Search ------------------------------------------------------------------ | |||||
| if (isTRUE(args$search)) { | |||||
| # Also simplify if --and-simplify flag is called | |||||
| if (args[["--and-simplify"]]) args$simplify <- TRUE | if (args[["--and-simplify"]]) args$simplify <- TRUE | ||||
| log_info("Searching for \"{paste0(args$terms, collapse = '\", \"')}\"") | |||||
| tweets <- | |||||
| # Search ---- | |||||
| if (isTRUE(args$search)) { | |||||
| max_id <- args[["max_id"]] | |||||
| since_id <- args[["since_id"]] | |||||
| since_id <- if (is.null(max_id)) { | |||||
| if (since_id == "last") { | |||||
| last_seen_tweet(file = args$file) | |||||
| } else if (since_id == "none") { | |||||
| NULL | |||||
| } else since_id | |||||
| } | |||||
| if (!is.null(since_id)) log_info("Tweets from {since_id}") | |||||
| if (!is.null(max_id)) log_info("Tweets up to {max_id}") | |||||
| tweets <- lapply( | |||||
| args$term, | |||||
| function(term) rtweet::search_tweets( | |||||
| q = term, | |||||
| n = as.integer(args$n), | |||||
| type = args$type, | |||||
| include_rts = args$include_rts, | |||||
| geocode = args$geocode, | |||||
| max_id = max_id, | |||||
| parse = !args[["no-parse"]], | |||||
| token = args$token, | |||||
| retryonratelimit = args$retryonratelimit, | |||||
| verbose = !args$quiet, | |||||
| since_id = since_id | |||||
| ) | |||||
| ) | |||||
| do.call("gathertweet_search", args) | |||||
| tweets <- dplyr::bind_rows(tweets) | |||||
| # Update ---- | |||||
| } else if (isTRUE(args$update)) { | |||||
| if (nrow(tweets) == 0) { | |||||
| log_info("No new tweets.") | |||||
| exit() | |||||
| } | |||||
| do.call("gathertweet_update", args) | |||||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||||
| tweets <- tweets[order(tweets$status_id), ] | |||||
| log_info("Gathered {nrow(tweets)} tweets") | |||||
| if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||||
| tweets <- save_tweets(tweets, args$file) | |||||
| log_info("Total of {nrow(tweets)} tweets in {args$file}") | |||||
| # Update ------------------------------------------------------------------ | |||||
| } else if (isTRUE(args$update)) { | |||||
| logger("Updating tweets in {args$file}") | |||||
| tweets <- update_tweets( | |||||
| file = args$file, | |||||
| # passed to rtweet::lookup_statuses() | |||||
| parse = !args[["no-parse"]], | |||||
| token = args$token | |||||
| ) | |||||
| log_debug("Status lookup returned {nrow(tweets)} tweets") | |||||
| if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||||
| tweets <- save_tweets(tweets, args$file) | |||||
| log_debug("Total of {nrow(tweets)} tweets in {args$file}") | |||||
| } else if (isTRUE(args$timeline)) { | |||||
| if (!length(args$users)) { | |||||
| stop("Please provide a list of users as user names, user IDs, or a mixture of both.") | |||||
| } | |||||
| # Timeline ---- | |||||
| } else if (isTRUE(args$timeline)) { | |||||
| if (!length(args$users)) { | |||||
| stop("Please provide a list of users as user names, user IDs, ", | |||||
| "or a mixture of both.") | |||||
| } | |||||
| log_info("Gathering tweets by {collapse(args$users)}") | |||||
| if (args[["--and-simplify"]]) args$simplify <- TRUE | |||||
| do.call("gathertweet_timeline", args) | |||||
| } | |||||
| tweets <- rtweet::get_timeline( | |||||
| user = args[["users"]], | |||||
| n = min(as.integer(args[["n"]]), 3200), | |||||
| max_id = args[["max_id"]], | |||||
| home = isTRUE(args[["home"]]), | |||||
| parse = isFALSE(args[["no-parse"]]), | |||||
| check = TRUE, | |||||
| token = args$token, | |||||
| include_rts = isTRUE(args[["include-rts"]]) | |||||
| ) | |||||
| tweets <- tweets[!duplicated(tweets$status_id), ] | |||||
| tweets <- tweets[order(tweets$status_id), ] | |||||
| log_info("Gathered {nrow(tweets)} tweets from {length(args$users)} users") | |||||
| if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]]) | |||||
| tweets <- save_tweets(tweets, args$file) | |||||
| log_info("Total of {nrow(tweets)} tweets in {args$file}") | |||||
| } | |||||
| # Simplify ---------------------------------------------------------------- | |||||
| if (isTRUE(args$simplify)) { | |||||
| do.call("gathertweet_simplify", args) | |||||
| } | |||||
| # Simplify ---------------------------------------------------------------- | |||||
| if (isTRUE(args$simplify)) { | |||||
| logger("Simplifying tweets in {args$file}") | |||||
| tweets_simplified <- simplify_tweets( | |||||
| tweets = NULL, | |||||
| file = args$file, | |||||
| .fields = args$fields | |||||
| ) | |||||
| log_debug("Simplified {nrow(tweets_simplified)} tweets") | |||||
| if (is.null(args$output)) { | |||||
| args$output <- gathertweet:::path_add(args$file, append = "_simplified") | |||||
| if (args$polite) { | |||||
| filelock::unlock(lck) | |||||
| unlink(lockfile) | |||||
| } | } | ||||
| log_info("Saving simplified tweets to {args$output}") | |||||
| tweets_simplfied <- save_tweets(tweets_simplified, args$output) | |||||
| } | |||||
| if (args$polite) { | |||||
| filelock::unlock(lck) | |||||
| unlink(lockfile) | |||||
| log_info("---- gathertweet {action} complete ----") | |||||
| } | } | ||||
| log_info("---- gathertweet {action} complete ----") | |||||
| do_gathertweet() |
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/gathertweet_actions.R | |||||
| \name{gathertweet_search} | |||||
| \alias{gathertweet_search} | |||||
| \title{gathertweet actions} | |||||
| \usage{ | |||||
| gathertweet_search(terms, file = "tweets.rds", n = 18000, | |||||
| max_id = NULL, since_id = "last", type = "recent", | |||||
| include_rts = FALSE, geocode = NULL, `no-parse` = FALSE, | |||||
| token = NULL, retryonratelimit = FALSE, quiet = FALSE, ...) | |||||
| } | |||||
| \description{ | |||||
| gathertweet actions | |||||
| } |