пре 7 година · ac7b015934
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,10 @@

 export("%>%")
 export(backup_tweets)
 export(gathertweet_search)
 export(gathertweet_simplify)
 export(gathertweet_timeline)
 export(gathertweet_update)
 export(get_user_info)
 export(install_gathertweet)
 export(last_seen_tweet)
--- a/R/gathertweet_actions.R
+++ b/R/gathertweet_actions.R
@@ -0,0 +1,152 @@
 #' @title gathertweet actions
 #' @export
 gathertweet_search <- function(
  terms,
  file             = "tweets.rds",
  n                = 18000,
  max_id           = NULL,
  since_id         = "last",
  type             = "recent",
  include_rts      = FALSE,
  geocode          = NULL,
  `no-parse`       = FALSE,
  token            = NULL,
  retryonratelimit = FALSE,
  quiet            = FALSE,
  ...
 ) {
  log_info("Searching for \"{paste0(terms, collapse = '\", \"')}\"")

  since_id <- if (is.null(max_id)) {
    if (since_id == "last") {
      last_seen_tweet(file = file)
    } else if (since_id == "none") {
      NULL
    } else since_id
  }
  if (!is.null(since_id)) log_info("Tweets from {since_id}")
  if (!is.null(max_id)) log_info("Tweets up to {max_id}")

  tweets <- lapply(
    terms,
    function(term) rtweet::search_tweets(
      q                = term,
      n                = as.integer(n),
      type             = type,
      include_rts      = include_rts,
      geocode          = geocode,
      max_id           = max_id,
      parse            = isFALSE(`no-parse`),
      token            = token,
      retryonratelimit = retryonratelimit,
      verbose          = isFALSE(quiet),
      since_id         = since_id
    )
  )



  if (isTRUE(`no-parse`)) {
    log_info("Saving un-parsed tweets in {file}")
    saveRDS(tweets, file)
  } else {
    tweets <- dplyr::bind_rows(tweets)

    if (nrow(tweets) == 0) {
      log_info("No new tweets.")
      exit()
    }

    tweets <- tweets[!duplicated(tweets$status_id), ]
    tweets <- tweets[order(tweets$status_id), ]

    log_info("Gathered {nrow(tweets)} tweets")
    tweets <- save_tweets(tweets, file)

    log_info("Total of {nrow(tweets)} tweets in {file}")
  }

  tweets
 }

 #' @export
 gathertweet_update <- function(file = "tweets.rds", `no-parse` = FALSE, token = NULL, ...) {
  logger("Updating tweets in {file}")
  if (!file.exists(file)) {
    log_fatal("`{file}` does not exist")
  }
  tweets <- update_tweets(
    file = file,
    # passed to rtweet::lookup_statuses()
    parse = isFALSE(`no-parse`),
    token = token
  )
  log_debug("Status lookup returned {nrow(tweets)} tweets")
  tweets <- save_tweets(tweets, file)
  log_debug("Total of {nrow(tweets)} tweets in {file}")
  tweets
 }

 #' @export
 gathertweet_timeline <- function(
  users,
  file        = "tweets.rds",
  n           = 3200,
  max_id      = NULL,
  home        = TRUE,
  `no-parse`  = FALSE,
  token       = NULL,
  include_rts = FALSE,
  ...
 ) {
  log_info("Gathering tweets by {collapse(users)}")

  n <- as.integer(n)
  if (n > 3200) {
    log_warn("Twitter API for timelines returns a maximum of 3200 tweets per user")
  }

  tweets <- rtweet::get_timeline(
    user        = users,
    n           = n,
    max_id      = max_id,
    home        = isTRUE(home),
    parse       = isFALSE(`no-parse`),
    check       = TRUE,
    token       = token,
    include_rts = isTRUE(include_rts)
  )

  tweets <- tweets[!duplicated(tweets$status_id), ]
  tweets <- tweets[order(tweets$status_id), ]

  log_info("Gathered {nrow(tweets)} tweets from {length(users)} users")
  tweets <- save_tweets(tweets, file)

  log_info("Total of {nrow(tweets)} tweets in {file}")
  tweets
 }

 #' @export
 gathertweet_simplify <- function(
  file = "tweets.rds",
  fields = NULL,
  output = NULL,
  ...
 ) {
  logger("Simplifying tweets in {file}")
  if (!file.exists(file)) {
    log_fatal("`{file}` does not exist")
  }
  tweets_simplified <- simplify_tweets(
    tweets = NULL,
    file = file,
    .fields = fields
  )
  log_debug("Simplified {nrow(tweets_simplified)} tweets")
  if (is.null(output)) {
    output <- gathertweet:::path_add(file, append = "_simplified")
  }
  log_info("Saving simplified tweets to {output}")
  save_tweets(tweets_simplified, output)
 }
--- a/R/logging.R
+++ b/R/logging.R
@@ -5,6 +5,8 @@
  futile.logger::flog.layout(gathertweet_layout, name = "gathertweet")
 }

 collapse <- function(..., sep = ", ") paste(..., collapse = sep)

 #' @title Logging functions
 #' @export
 logger <- function(..., level = "info", envir = parent.frame()) {
--- a/README.Rmd
+++ b/README.Rmd
@@ -7,7 +7,7 @@ output: github_document
 ```{r setup, include = FALSE}
 knitr::opts_chunk$set(
  collapse = TRUE,
  cache = TRUE,
  cache = FALSE,
  comment = "",
  prompt = TRUE,
  fig.path = "man/figures/README-",
--- a/README.md
+++ b/README.md
@@ -85,50 +85,49 @@ Get 100 \#rstats tweets

 ``` bash
 > gathertweet search --n 100 --quiet "#rstats"
 [2019-05-04 14:52:15] [INFO] ---- gathertweet search start ----
 [2019-05-04 14:52:15] [INFO] Searching for "#rstats"
 [2019-05-04 14:52:16] [INFO] Gathered 100 tweets
 [2019-05-04 14:52:16] [INFO] Total of 100 tweets in tweets.rds
 [2019-05-04 14:52:16] [INFO] ---- gathertweet search complete ----
 INFO [2019-05-06 21:56:27] ---- gathertweet search start ----
 INFO [2019-05-06 21:56:27] Searching for "#rstats"
 INFO [2019-05-06 21:56:28] Gathered 98 tweets
 INFO [2019-05-06 21:56:28] Total of 98 tweets in tweets.rds
 INFO [2019-05-06 21:56:28] ---- gathertweet search complete ----
 ```

 Get more tweets, automatically starting from end of the last search

 ``` bash
 > gathertweet search --n 100 --quiet "#rstats"
 [2019-05-04 14:53:17] [INFO] ---- gathertweet search start ----
 [2019-05-04 14:53:17] [INFO] Searching for "#rstats"
 [2019-05-04 14:53:17] [INFO] Tweets from 1124748486971359232
 [2019-05-04 14:53:17] [INFO] Gathered 1 tweets
 [2019-05-04 14:53:17] [INFO] Total of 100 tweets in tweets.rds
 [2019-05-04 14:53:17] [INFO] ---- gathertweet search complete ----
 INFO [2019-05-06 21:57:29] ---- gathertweet search start ----
 INFO [2019-05-06 21:57:29] Searching for "#rstats"
 INFO [2019-05-06 21:57:29] Tweets from 1125579895403352064
 INFO [2019-05-06 21:57:29] No new tweets.
 ```

 Update the stored data about those \#rstats tweets

 ``` bash
 > gathertweet update
 [2019-05-04 14:53:18] [INFO] ---- gathertweet update start ----
 [2019-05-04 14:53:18] [INFO] Updating tweets in tweets.rds
 [2019-05-04 14:53:18] [INFO] Getting 100 tweets
 [2019-05-04 14:53:19] [INFO] ---- gathertweet update complete ----
 INFO [2019-05-06 21:57:30] ---- gathertweet update start ----
 INFO [2019-05-06 21:57:30] Updating tweets in tweets.rds
 INFO [2019-05-06 21:57:30] Getting 98 tweets
 INFO [2019-05-06 21:57:31] ---- gathertweet update complete ----
 ```

 ``` bash
 > ls -lh
 total 40K
 -rw-rw-r-- 1 garrick garrick 39K May  4 14:53 tweets.rds
 -rw-rw-r-- 1 garrick garrick 39K May  6 21:57 tweets.rds
 ```

 Gather user timelines

 ``` bash
 > gathertweet timeline hadleywickham jennybryan dataandme
 [2019-05-04 21:11:54] [INFO] ---- gathertweet timeline start ----
 [2019-05-04 21:11:54] [INFO] Gathering tweets by hadleywickham, jennybryan, dataandme
 [2019-05-04 21:12:23] [INFO] Gathered 7368 tweets from 3 users
 [2019-05-04 21:12:23] [INFO] Total of 7368 tweets in tweets.rds
 [2019-05-04 21:12:23] [INFO] ---- gathertweet timeline complete ----
 INFO [2019-05-06 21:57:32] ---- gathertweet timeline start ----
 INFO [2019-05-06 21:57:32] Gathering tweets by hadleywickham, jennybryan, dataandme
 WARN [2019-05-06 21:57:32] Twitter API for timelines returns a maximum of 3200 tweets per user
 INFO [2019-05-06 21:58:01] Gathered 7427 tweets from 3 users
 INFO [2019-05-06 21:58:02] Total of 7524 tweets in tweets.rds
 INFO [2019-05-06 21:58:02] ---- gathertweet timeline complete ----
 ```

 ### Schedule tweet gathering using cron
@@ -157,52 +156,58 @@ crontab -e
    Usage:
      gathertweet search [--file=<file>] [options] [--] <terms>...
      gathertweet timeline [options] [--] <users>...
      gathertweet update [--file=<file> --token=<token> --backup --backup-dir=<dir> --polite --debug-args]
      gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>]
      gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...]
    
    Arguments
      <terms>  Search terms. Individual search terms are queried separately,
               but duplicated tweets are removed from the stored results.
               Each search term counts against the 15 minute rate limit of 180
               searches, which can be avoided by manually joining search terms
               into a single query. WARNING: Wrap queries with spaces in
               'single quotes': double quotes are allowed inside single quotes only.
    
      <fields>  Tweet fields that should be included. Default value will include
                `status_id`, `created_at`, `user_id`, `screen_name`, `text`,
                `favorite_count`, `retweet_count`, `is_quote`, `hashtags`,
                `mentions_screen_name`, `profile_url`, `profile_image_url`,
                `media_url`, `urls_url`, `urls_expanded_url`.
    
    Options:
      -h --help             Show this screen.
      --file <file>         Name of RDS file where tweets are stored [default: tweets.rds]
      --file <file>         Name of RDS file where tweets are stored
                            [default: tweets.rds]
      --no-parse            Disable parsing of the results
      --token <token>       See {rtweet} for more information
      --retryonratelimit    Wait and retry when rate limited (only relevant when n exceeds 18000 tweets)
      --quiet               Disable printing of {rtweet} processing/retrieval messages
      --retryonratelimit    Wait and retry when rate limited (only relevant when n
                            exceeds 18000 tweets)
      --quiet               Disable printing of {rtweet} processing messages
      --polite              Only allow one process (search|update) to run at a time
      --backup              Create a backup of existing tweet file before writing any new files
      --backup-dir <dir>    Location for backups, use "" for current directory. [default: backups]
      --debug-args          Print values of the arguments only
      --and-simplify        Create additional simplified tweet set with default values.
      --backup              Create a backup of existing tweet file
      --backup-dir <dir>    Location for backups [default: backups]
      --debug-args          Debug input arguments
      --and-simplify        Create additional simplified tweet set.
                            Run `gathertweet simplify` manually for more control.
    search:
      <terms>  Search terms. Individual search terms are queried separately,
               but duplicated tweets are removed from the stored results.
               Each search term counts against the 15 minute rate limit of 180
               searches, which can be avoided by manually joining search terms
               into a single query. NOTE: Wrap queries with spaces in
               'single quotes': only use double quotes within single quotes.
      --type <type>         Type of search results: "recent", "mixed", or "popular"
                            [default: recent]
      --geocode <geocode>   Geographical limiter of the template
                            "latitude,longitude,radius"
      --since_id <since_id> Return results with an ID greather than (newer than) or
                            equal to since_id, automatically extracted from the
                            existing tweets <file>, if it exists, and ignored when
                            <max_id> is set. Use "none" for all available tweets,
                            or "last" for the maximum seen status_id in existing
                            tweets. [default: last]
    
    search and timeline:
      -n, --n <n>           Number of tweets to return [default: 18000]
      --include_rts         Logical indicating whether retweets should be included
      --max_id <max_id>     Return results with an ID less than (older than) or equal to max_id
    
    search:
      --type <type>         Type of search results: "recent", "mixed", or "popular". [default: recent]
      --geocode <geocode>   Geographical limiter of the template "latitude,longitude,radius"
      --since_id <since_id> Return results with an ID greather than (newer than) or equal to since_id,
                            automatically extracted from the existing tweets <file>, if it exists, and
                            ignored when <max_id> is set. Use "none" for all available tweets,
                            or "last" for the maximum seen status_id in existing tweets. [default: last]
      -n, --n <n>        Number of tweets to return [default: 18000]
      --include_rts      Logical indicating whether retweets should be included
                         (default is to exclude RTs)
      --max_id <max_id>  Return tweets with an ID less (older) than or equal to
    
    timeline:
      --home                If included, returns home-timeline instead of user-timeline.
      <users>  A list of users as user names, IDs, or a mixture of both,
               separated by spaces.
      --home   If included, returns home-timeline instead of user-timeline.
    
    simplify:
      --output <output>     Output file, default is input file with `_simplified` appended to name.
      <fields>  Tweet fields that should be included. By default includes:
                `status_id`, `created_at`, `user_id`, `screen_name`, `text`,
                `favorite_count`, `retweet_count`, `is_quote`, `hashtags`,
                `mentions_screen_name`, `profile_url`, `profile_image_url`,
                `media_url`, `urls_url`, `urls_expanded_url`.
      --output <output>  Output file, default is input file with `_simplified`
                         appended to name.
--- a/inst/gathertweet.R
+++ b/inst/gathertweet.R
@@ -1,60 +1,66 @@
 #! /usr/bin/env Rscript

 # Usage -------------------------------------------------------------------
 # Usage -----------------------------------------------------------------------
 'Gather tweets from the command line

 Usage:
  gathertweet search [--file=<file>] [options] [--] <terms>...
  gathertweet timeline [options] [--] <users>...
  gathertweet update [--file=<file> --token=<token> --backup --backup-dir=<dir> --polite --debug-args]
  gathertweet update [--file=<file> --and-simplify --polite --debug-args --token=<token> --backup --backup-dir=<dir>]
  gathertweet simplify [--file=<file> --output=<output> --debug-args --polite] [<fields>...]

 Arguments
  <terms>  Search terms. Individual search terms are queried separately,
           but duplicated tweets are removed from the stored results.
           Each search term counts against the 15 minute rate limit of 180
           searches, which can be avoided by manually joining search terms
           into a single query. WARNING: Wrap queries with spaces in
           \'single quotes\': double quotes are allowed inside single quotes only.

  <fields>  Tweet fields that should be included. Default value will include
            `status_id`, `created_at`, `user_id`, `screen_name`, `text`,
            `favorite_count`, `retweet_count`, `is_quote`, `hashtags`,
            `mentions_screen_name`, `profile_url`, `profile_image_url`,
            `media_url`, `urls_url`, `urls_expanded_url`.

 Options:
  -h --help             Show this screen.
  --file <file>         Name of RDS file where tweets are stored [default: tweets.rds]
  --file <file>         Name of RDS file where tweets are stored
                        [default: tweets.rds]
  --no-parse            Disable parsing of the results
  --token <token>       See {rtweet} for more information
  --retryonratelimit    Wait and retry when rate limited (only relevant when n exceeds 18000 tweets)
  --quiet               Disable printing of {rtweet} processing/retrieval messages
  --retryonratelimit    Wait and retry when rate limited (only relevant when n
                        exceeds 18000 tweets)
  --quiet               Disable printing of {rtweet} processing messages
  --polite              Only allow one process (search|update) to run at a time
  --backup              Create a backup of existing tweet file before writing any new files
  --backup-dir <dir>    Location for backups, use "" for current directory. [default: backups]
  --debug-args          Print values of the arguments only
  --and-simplify        Create additional simplified tweet set with default values.
  --backup              Create a backup of existing tweet file
  --backup-dir <dir>    Location for backups [default: backups]
  --debug-args          Debug input arguments
  --and-simplify        Create additional simplified tweet set.
                        Run `gathertweet simplify` manually for more control.
 search:
  <terms>  Search terms. Individual search terms are queried separately,
           but duplicated tweets are removed from the stored results.
           Each search term counts against the 15 minute rate limit of 180
           searches, which can be avoided by manually joining search terms
           into a single query. NOTE: Wrap queries with spaces in
           \'single quotes\': only use double quotes within single quotes.
  --type <type>         Type of search results: "recent", "mixed", or "popular"
                        [default: recent]
  --geocode <geocode>   Geographical limiter of the template
                        "latitude,longitude,radius"
  --since_id <since_id> Return results with an ID greather than (newer than) or
                        equal to since_id, automatically extracted from the
                        existing tweets <file>, if it exists, and ignored when
                        <max_id> is set. Use "none" for all available tweets,
                        or "last" for the maximum seen status_id in existing
                        tweets. [default: last]

 search and timeline:
  -n, --n <n>           Number of tweets to return [default: 18000]
  --include_rts         Logical indicating whether retweets should be included
  --max_id <max_id>     Return results with an ID less than (older than) or equal to max_id

 search:
  --type <type>         Type of search results: "recent", "mixed", or "popular". [default: recent]
  --geocode <geocode>   Geographical limiter of the template "latitude,longitude,radius"
  --since_id <since_id> Return results with an ID greather than (newer than) or equal to since_id,
                        automatically extracted from the existing tweets <file>, if it exists, and
                        ignored when <max_id> is set. Use "none" for all available tweets,
                        or "last" for the maximum seen status_id in existing tweets. [default: last]
  -n, --n <n>        Number of tweets to return [default: 18000]
  --include_rts      Logical indicating whether retweets should be included
                     (default is to exclude RTs)
  --max_id <max_id>  Return tweets with an ID less (older) than or equal to

 timeline:
  --home                If included, returns home-timeline instead of user-timeline.
  <users>  A list of users as user names, IDs, or a mixture of both,
           separated by spaces.
  --home   If included, returns home-timeline instead of user-timeline.

 simplify:
  --output <output>     Output file, default is input file with `_simplified` appended to name.
  <fields>  Tweet fields that should be included. By default includes:
            `status_id`, `created_at`, `user_id`, `screen_name`, `text`,
            `favorite_count`, `retweet_count`, `is_quote`, `hashtags`,
            `mentions_screen_name`, `profile_url`, `profile_image_url`,
            `media_url`, `urls_url`, `urls_expanded_url`.
  --output <output>  Output file, default is input file with `_simplified`
                     appended to name.
 ' -> doc

 library(docopt)
@@ -67,140 +73,76 @@ if (args$`--debug-args`) {
  exit()
 }

 library(gathertweet)
 collapse <- function(..., sep = ", ") paste(..., collapse = sep)
 do_gathertweet <- function() {
  library(gathertweet)
  collapse <- function(..., sep = ", ") paste(..., collapse = sep)

 # Which action was called?
 valid_actions <- c("search", "update", "simplify", "timeline")
 action <- names(Filter(isTRUE, args[valid_actions]))
 if (!length(action)) {
  log_fatal("Please specify a valid action: {collapse(valid_actions)}")
 }
  # Which action was called?
  valid_actions <- c("search", "update", "simplify", "timeline")
  action <- names(Filter(isTRUE, args[valid_actions]))
  if (!length(action)) {
    log_fatal("Please specify a valid action: {collapse(valid_actions)}")
  }

 if (args$polite) {
  lockfile <- paste0(".gathertweet_",
                     digest::digest(args[c("file", "search", "update", "simplify")]),
                     ".lock")
  lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0)
  gathertweet:::stopifnot_locked(lck, "Another gathertweet {action} process is currently running for {args$file}")
 }
  if (args$polite) {
    lockfile <- paste0(
      ".gathertweet_",
      digest::digest(args[c("file", "search", "update", "simplify")]),
      ".lock"
    )
    lck <- filelock::lock(lockfile, exclusive = TRUE, timeout = 0)
    gathertweet:::stopifnot_locked(
      lck,
      "Another gathertweet {action} process is currently running for {args$file}"
    )
    on.exit({
      filelock::unlock(lck)
      unlink(lockfile)
    })
  }

 log_info("---- gathertweet {action} start ----")
  log_info("---- gathertweet {action} start ----")

  if (isTRUE(args$backup)) {
    backup_tweets(args$file, backup_dir = args[["backup-dir"]])
  }

 # Search ------------------------------------------------------------------
 if (isTRUE(args$search)) {
  # Also simplify if --and-simplify flag is called
  if (args[["--and-simplify"]]) args$simplify <- TRUE

  log_info("Searching for \"{paste0(args$terms, collapse = '\", \"')}\"")
  tweets <-
    # Search ----
  if (isTRUE(args$search)) {

  max_id <- args[["max_id"]]
  since_id <- args[["since_id"]]
  since_id <- if (is.null(max_id)) {
    if (since_id == "last") {
      last_seen_tweet(file = args$file)
    } else if (since_id == "none") {
      NULL
    } else since_id
  }
  if (!is.null(since_id)) log_info("Tweets from {since_id}")
  if (!is.null(max_id)) log_info("Tweets up to {max_id}")

  tweets <- lapply(
    args$term,
    function(term) rtweet::search_tweets(
      q = term,
      n = as.integer(args$n),
      type = args$type,
      include_rts = args$include_rts,
      geocode = args$geocode,
      max_id = max_id,
      parse = !args[["no-parse"]],
      token = args$token,
      retryonratelimit = args$retryonratelimit,
      verbose = !args$quiet,
      since_id = since_id
    )
  )
    do.call("gathertweet_search", args)

  tweets <- dplyr::bind_rows(tweets)
    # Update ----
  } else if (isTRUE(args$update)) {

  if (nrow(tweets) == 0) {
    log_info("No new tweets.")
    exit()
  }
    do.call("gathertweet_update", args)

  tweets <- tweets[!duplicated(tweets$status_id), ]
  tweets <- tweets[order(tweets$status_id), ]

  log_info("Gathered {nrow(tweets)} tweets")
  if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]])
  tweets <- save_tweets(tweets, args$file)

  log_info("Total of {nrow(tweets)} tweets in {args$file}")

 # Update ------------------------------------------------------------------
 } else if (isTRUE(args$update)) {
  logger("Updating tweets in {args$file}")
  tweets <- update_tweets(
    file = args$file,
    # passed to rtweet::lookup_statuses()
    parse = !args[["no-parse"]],
    token = args$token
  )
  log_debug("Status lookup returned {nrow(tweets)} tweets")
  if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]])
  tweets <- save_tweets(tweets, args$file)
  log_debug("Total of {nrow(tweets)} tweets in {args$file}")

 } else if (isTRUE(args$timeline)) {
  if (!length(args$users)) {
    stop("Please provide a list of users as user names, user IDs, or a mixture of both.")
  }
    # Timeline ----
  } else if (isTRUE(args$timeline)) {
    if (!length(args$users)) {
      stop("Please provide a list of users as user names, user IDs, ",
           "or a mixture of both.")
    }

  log_info("Gathering tweets by {collapse(args$users)}")
  if (args[["--and-simplify"]]) args$simplify <- TRUE
    do.call("gathertweet_timeline", args)
  }

  tweets <- rtweet::get_timeline(
    user = args[["users"]],
    n = min(as.integer(args[["n"]]), 3200),
    max_id = args[["max_id"]],
    home = isTRUE(args[["home"]]),
    parse = isFALSE(args[["no-parse"]]),
    check = TRUE,
    token = args$token,
    include_rts = isTRUE(args[["include-rts"]])
  )

  tweets <- tweets[!duplicated(tweets$status_id), ]
  tweets <- tweets[order(tweets$status_id), ]

  log_info("Gathered {nrow(tweets)} tweets from {length(args$users)} users")
  if (args$backup) backup_tweets(args$file, backup_dir = args[["backup-dir"]])
  tweets <- save_tweets(tweets, args$file)

  log_info("Total of {nrow(tweets)} tweets in {args$file}")
 }

  # Simplify ----------------------------------------------------------------
  if (isTRUE(args$simplify)) {
    do.call("gathertweet_simplify", args)
  }

 # Simplify ----------------------------------------------------------------
 if (isTRUE(args$simplify)) {
  logger("Simplifying tweets in {args$file}")
  tweets_simplified <- simplify_tweets(
    tweets = NULL,
    file = args$file,
    .fields = args$fields
  )
  log_debug("Simplified {nrow(tweets_simplified)} tweets")
  if (is.null(args$output)) {
    args$output <- gathertweet:::path_add(args$file, append = "_simplified")
  if (args$polite) {
    filelock::unlock(lck)
    unlink(lockfile)
  }
  log_info("Saving simplified tweets to {args$output}")
  tweets_simplfied <- save_tweets(tweets_simplified, args$output)
 }

 if (args$polite) {
  filelock::unlock(lck)
  unlink(lockfile)
  log_info("---- gathertweet {action} complete ----")
 }

 log_info("---- gathertweet {action} complete ----")
 do_gathertweet()
--- a/man/gathertweet_search.Rd
+++ b/man/gathertweet_search.Rd
@@ -0,0 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/gathertweet_actions.R
 \name{gathertweet_search}
 \alias{gathertweet_search}
 \title{gathertweet actions}
 \usage{
 gathertweet_search(terms, file = "tweets.rds", n = 18000,
  max_id = NULL, since_id = "last", type = "recent",
  include_rts = FALSE, geocode = NULL, `no-parse` = FALSE,
  token = NULL, retryonratelimit = FALSE, quiet = FALSE, ...)
 }
 \description{
 gathertweet actions
 }