Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get_bucket() works around the API limit of 1000 objects #104

Merged
merged 4 commits into from
Jan 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ Authors@R: c(person("Thomas J.", "Leeper", role = c("aut", "cre"),
person("Mark", "Thompson", role = "ctb"),
person("Tyler", "Hunt", role = "ctb"),
person("Steven", "Akins", role = "ctb"),
person("Bao", "Nguyen", role = "ctb"))
person("Bao", "Nguyen", role = "ctb"),
person("Thierry", "Onkelinx", role = "ctb"))
Description: A simple client package for the Amazon Web Services (AWS) Simple
Storage Service (S3) REST API.
License: GPL (>= 2)
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ importFrom(httr,warn_for_status)
importFrom(tools,md5sum)
importFrom(utils,URLencode)
importFrom(utils,str)
importFrom(utils,tail)
importFrom(xml2,as_list)
importFrom(xml2,read_xml)
importFrom(xml2,write_xml)
Expand Down
89 changes: 62 additions & 27 deletions R/get_bucket.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @template bucket
#' @param prefix Character string that limits the response to keys that begin with the specified prefix
#' @param delimiter Character string used to group keys. Read the AWS doc for more detail.
#' @param max Integer indicating the maximum number of keys to return (max 1000).
#' @param max Integer indicating the maximum number of keys to return. The function will recursively access the bucket in case \code{max > 1000}. Use \code{max = Inf} to retrieve all objects.
#' @param marker Character string that pecifies the key to start with when listing objects in a bucket. Amazon S3 returns object keys in alphabetical order, starting with key after the marker in order.
#' @param parse_response logical, should we attempt to parse the response?
#' @template dots
Expand All @@ -20,48 +20,83 @@
#' @references \href{https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGET.html}{API Documentation}
#' @seealso \code{\link{bucketlist}}, \code{\link{get_object}}
#' @export
get_bucket <- function(bucket,
prefix = NULL,
#' @importFrom utils tail
get_bucket <- function(bucket,
prefix = NULL,
delimiter = NULL,
max = NULL,
marker = NULL,
marker = NULL,
parse_response = TRUE,
...) {

query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = max, marker = marker)
r <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...)

if (!isTRUE(parse_response)) {
out <- r
} else if (inherits(r, "aws_error")) {
out <- r
if (is.null(max)) {
query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = NULL, marker = marker)
} else {
for (i in which(names(r) == "Contents")) {
r[[i]][["Bucket"]] <- get_bucketname(bucket)
r[[i]][["Size"]] <- as.numeric(r[[i]][["Size"]])
attr(r[[i]], "class") <- "s3_object"
query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = pmin(1000, max), marker = marker)
}
r <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...)

if (isTRUE(parse_response)) {
while (
r$IsTruncated == "true" &&
!is.null(max) &&
as.integer(r$MaxKeys) < max
) {
query <- list(
prefix = prefix,
delimiter = delimiter,
"max-keys" = pmin(max - as.integer(r$MaxKeys), 1000),
marker = tail(r, 1)$Contents$Key
)
extra <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...)
new_r <- c(r, tail(extra, -5))
new_r$MaxKeys <- as.character(as.integer(r$MaxKeys) + as.integer(extra$MaxKeys))
new_r$IsTruncated <- extra$IsTruncated
attr(new_r, "x-amz-id-2") <- attr(r, "x-amz-id-2")
attr(new_r, "x-amz-request-id") <- attr(r, "x-amz-request-id")
attr(new_r, "date") <- attr(r, "date")
attr(new_r, "x-amz-bucket-region") <- attr(r, "x-amz-bucket-region")
attr(new_r, "content-type") <- attr(r, "content-type")
attr(new_r, "transfer-encoding") <- attr(r, "transfer-encoding")
attr(new_r, "server") <- attr(r, "server")
r <- new_r
}
att <- r[names(r) != "Contents"]
r[names(r) != "Contents"] <- NULL
out <- structure(r, class = "s3_bucket")
attributes(out) <- c(attributes(out), att)
}

if (!isTRUE(parse_response)) {
return(r)
}

if (inherits(r, "aws_error")) {
return(r)
}

for (i in which(names(r) == "Contents")) {
r[[i]][["Bucket"]] <- get_bucketname(bucket)
r[[i]][["Size"]] <- as.numeric(r[[i]][["Size"]])
attr(r[[i]], "class") <- "s3_object"
}
att <- r[names(r) != "Contents"]
r[names(r) != "Contents"] <- NULL
out <- structure(r, class = "s3_bucket")
attributes(out) <- c(attributes(out), att)

out
}

#' @rdname get_bucket
#' @export
get_bucket_df <-
function(bucket,
prefix = NULL,
get_bucket_df <-
function(bucket,
prefix = NULL,
delimiter = NULL,
max = NULL,
marker = NULL,
marker = NULL,
...) {
r <- get_bucket(bucket = bucket, prefix = prefix, delimiter = delimiter,

r <- get_bucket(bucket = bucket, prefix = prefix, delimiter = delimiter,
max = max, marker = marker, parse_response = TRUE, ...)

if (length(r)) {
out <- lapply(r, function(x) {
c(Key = x[["Key"]],
Expand Down Expand Up @@ -105,7 +140,7 @@ function(bucket,
#' @references \href{http://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadListMPUpload.html}{API Documentation}
#' @export
get_uploads <- function(bucket, ...){
r <- s3HTTP(verb = "GET",
r <- s3HTTP(verb = "GET",
bucket = bucket,
query = list(uploads = ""),
...)
Expand Down
2 changes: 1 addition & 1 deletion man/get_bucket.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.