Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

149 lines
5.3KB

  1. #! /usr/bin/env Rscript
  2. # Usage -----------------------------------------------------------------------
  3. 'Gather tweets from the command line
  4. Usage:
  5. gathertweet search [--file=<file>] [options] [--] <terms>...
  6. gathertweet timeline [options] [--] <users>...
  7. gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>]
  8. gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...]
  9. Options:
  10. -h --help Show this screen.
  11. --file <file> Name of RDS file where tweets are stored
  12. [default: tweets.rds]
  13. --no-parse Disable parsing of the results
  14. --token <token> See {rtweet} for more information
  15. --retryonratelimit Wait and retry when rate limited (only relevant when n
  16. exceeds 18000 tweets)
  17. --quiet Disable printing of {rtweet} processing messages
  18. --polite Only allow one process (search|update) to run at a time
  19. --backup Create a backup of existing tweet file
  20. --backup-dir <dir> Location for backups [default: backups]
  21. --debug-args Debug input arguments
  22. --and-simplify Create additional simplified tweet set.
  23. Run `gathertweet simplify` manually for more control.
  24. search:
  25. <terms> Search terms. Individual search terms are queried separately,
  26. but duplicated tweets are removed from the stored results.
  27. Each search term counts against the 15 minute rate limit of 180
  28. searches, which can be avoided by manually joining search terms
  29. into a single query. NOTE: Wrap queries with spaces in
  30. \'single quotes\': only use double quotes within single quotes.
  31. --type <type> Type of search results: "recent", "mixed", or "popular"
  32. [default: recent]
  33. --geocode <geocode> Geographical limiter of the template
  34. "latitude,longitude,radius"
  35. --since_id <since_id> Return results with an ID greather than (newer than) or
  36. equal to since_id, automatically extracted from the
  37. existing tweets <file>, if it exists, and ignored when
  38. <max_id> is set. Use "none" for all available tweets,
  39. or "last" for the maximum seen status_id in existing
  40. tweets. [default: last]
  41. search and timeline:
  42. -n, --n <n> Number of tweets to return [default: 18000]
  43. --include_rts Logical indicating whether retweets should be included
  44. (default is to exclude RTs)
  45. --max_id <max_id> Return tweets with an ID less (older) than or equal to
  46. timeline:
  47. <users> A list of users as user names, IDs, or a mixture of both,
  48. separated by spaces.
  49. --home If included, returns home-timeline instead of user-timeline.
  50. simplify:
  51. <fields> Tweet fields that should be included. By default includes:
  52. `status_id`, `created_at`, `user_id`, `screen_name`, `text`,
  53. `favorite_count`, `retweet_count`, `is_quote`, `hashtags`,
  54. `mentions_screen_name`, `profile_url`, `profile_image_url`,
  55. `media_url`, `urls_url`, `urls_expanded_url`.
  56. --output <output> Output file, default is input file with `_simplified`
  57. appended to name.
  58. ' -> doc
  59. library(docopt)
  60. args <- docopt(doc, version = paste('gathertweet version', packageVersion("gathertweet")))
  61. exit <- function(value = 0) q(save = "no", value)
  62. if (args$`--debug-args`) {
  63. str(args)
  64. saveRDS(args, "args.rds")
  65. exit()
  66. }
  67. do_gathertweet <- function() {
  68. library(gathertweet)
  69. collapse <- function(..., sep = ", ") paste(..., collapse = sep)
  70. # Which action was called?
  71. valid_actions <- c("search", "update", "simplify", "timeline")
  72. action <- names(Filter(isTRUE, args[valid_actions]))
  73. if (!length(action)) {
  74. log_fatal("Please specify a valid action: {collapse(valid_actions)}")
  75. }
  76. if (args$polite) {
  77. lockfile <- paste0(
  78. ".gathertweet_",
  79. digest::digest(args[c("file", "search", "update", "simplify")]),
  80. ".lock"
  81. )
  82. lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0)
  83. gathertweet:::stopifnot_locked(
  84. lck,
  85. "Another gathertweet {action} process is currently running for {args$file}"
  86. )
  87. on.exit({
  88. filelock::unlock(lck)
  89. unlink(lockfile)
  90. })
  91. }
  92. log_info("---- gathertweet {action} start ----")
  93. if (isTRUE(args$backup)) {
  94. backup_tweets(args$file, backup_dir = args[["backup-dir"]])
  95. }
  96. # Also simplify if --and-simplify flag is called
  97. if (args[["--and-simplify"]]) args$simplify <- TRUE
  98. tweets <-
  99. # Search ----
  100. if (isTRUE(args$search)) {
  101. do.call("gathertweet_search", args)
  102. # Update ----
  103. } else if (isTRUE(args$update)) {
  104. do.call("gathertweet_update", args)
  105. # Timeline ----
  106. } else if (isTRUE(args$timeline)) {
  107. if (!length(args$users)) {
  108. stop("Please provide a list of users as user names, user IDs, ",
  109. "or a mixture of both.")
  110. }
  111. do.call("gathertweet_timeline", args)
  112. }
  113. # Simplify ----------------------------------------------------------------
  114. if (isTRUE(args$simplify)) {
  115. do.call("gathertweet_simplify", args)
  116. }
  117. if (args$polite) {
  118. filelock::unlock(lck)
  119. unlink(lockfile)
  120. }
  121. log_info("---- gathertweet {action} complete ----")
  122. }
  123. do_gathertweet()