Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

208 lines
5.1KB

  1. #' @title gathertweet actions
  2. #' @export
  3. gathertweet_search <- function(
  4. terms,
  5. file = "tweets.rds",
  6. n = 18000,
  7. max_id = NULL,
  8. since_id = "last",
  9. type = "recent",
  10. include_rts = FALSE,
  11. geocode = NULL,
  12. `no-parse` = FALSE,
  13. token = NULL,
  14. retryonratelimit = FALSE,
  15. quiet = FALSE,
  16. ...
  17. ) {
  18. log_info("Searching for \"{paste0(terms, collapse = '\", \"')}\"")
  19. since_id <- set_since_id(since_id, max_id, file)
  20. tweets <- lapply(
  21. terms,
  22. function(term) rtweet::search_tweets(
  23. q = term,
  24. n = as.integer(n),
  25. type = type,
  26. include_rts = include_rts,
  27. geocode = geocode,
  28. max_id = max_id,
  29. parse = isFALSE(`no-parse`),
  30. token = token,
  31. retryonratelimit = retryonratelimit,
  32. verbose = isFALSE(quiet),
  33. since_id = since_id
  34. )
  35. )
  36. if (isTRUE(`no-parse`)) {
  37. log_info("Saving un-parsed tweets in {file}")
  38. saveRDS(tweets, file)
  39. } else {
  40. tweets <- dplyr::bind_rows(tweets)
  41. if (nrow(tweets) == 0) {
  42. log_info("No new tweets.")
  43. exit()
  44. }
  45. tweets <- tweets[!duplicated(tweets$status_id), ]
  46. tweets <- tweets[order(tweets$status_id), ]
  47. log_info("Gathered {nrow(tweets)} tweets")
  48. tweets <- save_tweets(tweets, file)
  49. log_info("Total of {nrow(tweets)} tweets in {file}")
  50. }
  51. tweets
  52. }
  53. #' @export
  54. gathertweet_update <- function(file = "tweets.rds", `no-parse` = FALSE, token = NULL, ...) {
  55. logger("Updating tweets in {file}")
  56. if (!file.exists(file)) {
  57. log_fatal("`{file}` does not exist")
  58. }
  59. tweets <- update_tweets(
  60. file = file,
  61. # passed to rtweet::lookup_statuses()
  62. parse = isFALSE(`no-parse`),
  63. token = token
  64. )
  65. log_debug("Status lookup returned {nrow(tweets)} tweets")
  66. tweets <- save_tweets(tweets, file)
  67. log_debug("Total of {nrow(tweets)} tweets in {file}")
  68. tweets
  69. }
  70. #' @export
  71. gathertweet_timeline <- function(
  72. users,
  73. file = "tweets.rds",
  74. n = 3200,
  75. max_id = NULL,
  76. home = TRUE,
  77. `no-parse` = FALSE,
  78. token = NULL,
  79. include_rts = FALSE,
  80. ...
  81. ) {
  82. log_info("Gathering tweets by {collapse(users)}")
  83. n <- as.integer(n)
  84. if (n > 3200) {
  85. log_warn("Twitter API for timelines returns a maximum of 3200 tweets per user")
  86. }
  87. tweets <- rtweet::get_timeline(
  88. user = users,
  89. n = n,
  90. max_id = max_id,
  91. home = isTRUE(home),
  92. parse = isFALSE(`no-parse`),
  93. check = TRUE,
  94. token = token,
  95. include_rts = isTRUE(include_rts)
  96. )
  97. if (!nrow(tweets)) {
  98. log_fatal("No new tweets.")
  99. }
  100. tweets <- tweets[!duplicated(tweets$status_id), ]
  101. tweets <- tweets[order(tweets$status_id), ]
  102. log_info("Gathered {nrow(tweets)} tweets from {length(users)} users")
  103. tweets <- save_tweets(tweets, file)
  104. log_info("Total of {nrow(tweets)} tweets in {file}")
  105. tweets
  106. }
  107. #' @export
  108. gathertweet_favorites <- function(
  109. users,
  110. file = "tweets.rds",
  111. n = 3000,
  112. max_id = NULL,
  113. since_id = NULL,
  114. `no-parse` = FALSE,
  115. token = NULL,
  116. ...
  117. ) {
  118. log_info("Gathering tweets favorited by {collapse(users)}")
  119. since_id <- set_since_id(since_id, max_id, file)
  120. n <- as.integer(n)
  121. if (n > 3000) {
  122. log_warn("Twitter API for favorites/list returns a maximum of 3000 tweets per user")
  123. n <- 3000
  124. }
  125. tweets <- rtweet::get_favorites(
  126. user = users,
  127. n = n,
  128. max_id = max_id,
  129. since_id = since_id,
  130. parse = isFALSE(`no-parse`),
  131. token = token
  132. )
  133. if (!nrow(tweets)) {
  134. log_fatal("No new tweets.")
  135. }
  136. tweets <- tweets[!duplicated(tweets$status_id), ]
  137. tweets <- tweets[order(tweets$status_id), ]
  138. log_info("Gathered {nrow(tweets)} tweets from {length(users)} users")
  139. tweets <- save_tweets(tweets, file)
  140. log_info("Total of {nrow(tweets)} tweets in {file}")
  141. tweets
  142. }
  143. #' @export
  144. gathertweet_simplify <- function(
  145. file = "tweets.rds",
  146. fields = NULL,
  147. output = NULL,
  148. ...
  149. ) {
  150. logger("Simplifying tweets in {file}")
  151. if (!file.exists(file)) {
  152. log_fatal("`{file}` does not exist")
  153. }
  154. tweets_simplified <- simplify_tweets(
  155. tweets = NULL,
  156. file = file,
  157. .fields = fields
  158. )
  159. log_debug("Simplified {nrow(tweets_simplified)} tweets")
  160. if (is.null(output)) {
  161. output <- gathertweet:::path_add(file, append = "_simplified")
  162. }
  163. log_info("Saving simplified tweets to {output}")
  164. save_tweets(tweets_simplified, output)
  165. }
  166. isFALSE <- function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x
  167. set_since_id <- function(since_id = NULL, max_id = NULL, file = NULL) {
  168. since_id <- if (is.null(max_id)) {
  169. if (since_id == "last") {
  170. if (is.null(file)) {
  171. log_fatal("`file` must be provided for since_id = \"last\"")
  172. }
  173. last_seen_tweet(file = file)
  174. } else if (since_id == "none") {
  175. NULL
  176. } else since_id
  177. }
  178. if (!is.null(since_id)) log_info("Tweets from {since_id}")
  179. if (!is.null(max_id)) log_info("Tweets up to {max_id}")
  180. since_id
  181. }