You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

153 lines
3.7KB

  1. #' @title gathertweet actions
  2. #' @export
  3. gathertweet_search <- function(
  4. terms,
  5. file = "tweets.rds",
  6. n = 18000,
  7. max_id = NULL,
  8. since_id = "last",
  9. type = "recent",
  10. include_rts = FALSE,
  11. geocode = NULL,
  12. `no-parse` = FALSE,
  13. token = NULL,
  14. retryonratelimit = FALSE,
  15. quiet = FALSE,
  16. ...
  17. ) {
  18. log_info("Searching for \"{paste0(terms, collapse = '\", \"')}\"")
  19. since_id <- if (is.null(max_id)) {
  20. if (since_id == "last") {
  21. last_seen_tweet(file = file)
  22. } else if (since_id == "none") {
  23. NULL
  24. } else since_id
  25. }
  26. if (!is.null(since_id)) log_info("Tweets from {since_id}")
  27. if (!is.null(max_id)) log_info("Tweets up to {max_id}")
  28. tweets <- lapply(
  29. terms,
  30. function(term) rtweet::search_tweets(
  31. q = term,
  32. n = as.integer(n),
  33. type = type,
  34. include_rts = include_rts,
  35. geocode = geocode,
  36. max_id = max_id,
  37. parse = isFALSE(`no-parse`),
  38. token = token,
  39. retryonratelimit = retryonratelimit,
  40. verbose = isFALSE(quiet),
  41. since_id = since_id
  42. )
  43. )
  44. if (isTRUE(`no-parse`)) {
  45. log_info("Saving un-parsed tweets in {file}")
  46. saveRDS(tweets, file)
  47. } else {
  48. tweets <- dplyr::bind_rows(tweets)
  49. if (nrow(tweets) == 0) {
  50. log_info("No new tweets.")
  51. exit()
  52. }
  53. tweets <- tweets[!duplicated(tweets$status_id), ]
  54. tweets <- tweets[order(tweets$status_id), ]
  55. log_info("Gathered {nrow(tweets)} tweets")
  56. tweets <- save_tweets(tweets, file)
  57. log_info("Total of {nrow(tweets)} tweets in {file}")
  58. }
  59. tweets
  60. }
  61. #' @export
  62. gathertweet_update <- function(file = "tweets.rds", `no-parse` = FALSE, token = NULL, ...) {
  63. logger("Updating tweets in {file}")
  64. if (!file.exists(file)) {
  65. log_fatal("`{file}` does not exist")
  66. }
  67. tweets <- update_tweets(
  68. file = file,
  69. # passed to rtweet::lookup_statuses()
  70. parse = isFALSE(`no-parse`),
  71. token = token
  72. )
  73. log_debug("Status lookup returned {nrow(tweets)} tweets")
  74. tweets <- save_tweets(tweets, file)
  75. log_debug("Total of {nrow(tweets)} tweets in {file}")
  76. tweets
  77. }
  78. #' @export
  79. gathertweet_timeline <- function(
  80. users,
  81. file = "tweets.rds",
  82. n = 3200,
  83. max_id = NULL,
  84. home = TRUE,
  85. `no-parse` = FALSE,
  86. token = NULL,
  87. include_rts = FALSE,
  88. ...
  89. ) {
  90. log_info("Gathering tweets by {collapse(users)}")
  91. n <- as.integer(n)
  92. if (n > 3200) {
  93. log_warn("Twitter API for timelines returns a maximum of 3200 tweets per user")
  94. }
  95. tweets <- rtweet::get_timeline(
  96. user = users,
  97. n = n,
  98. max_id = max_id,
  99. home = isTRUE(home),
  100. parse = isFALSE(`no-parse`),
  101. check = TRUE,
  102. token = token,
  103. include_rts = isTRUE(include_rts)
  104. )
  105. tweets <- tweets[!duplicated(tweets$status_id), ]
  106. tweets <- tweets[order(tweets$status_id), ]
  107. log_info("Gathered {nrow(tweets)} tweets from {length(users)} users")
  108. tweets <- save_tweets(tweets, file)
  109. log_info("Total of {nrow(tweets)} tweets in {file}")
  110. tweets
  111. }
  112. #' @export
  113. gathertweet_simplify <- function(
  114. file = "tweets.rds",
  115. fields = NULL,
  116. output = NULL,
  117. ...
  118. ) {
  119. logger("Simplifying tweets in {file}")
  120. if (!file.exists(file)) {
  121. log_fatal("`{file}` does not exist")
  122. }
  123. tweets_simplified <- simplify_tweets(
  124. tweets = NULL,
  125. file = file,
  126. .fields = fields
  127. )
  128. log_debug("Simplified {nrow(tweets_simplified)} tweets")
  129. if (is.null(output)) {
  130. output <- gathertweet:::path_add(file, append = "_simplified")
  131. }
  132. log_info("Saving simplified tweets to {output}")
  133. save_tweets(tweets_simplified, output)
  134. }