Skip to content

Commit

Permalink
Add standalone bson reader
Browse files Browse the repository at this point in the history
  • Loading branch information
jeroen committed Dec 2, 2024
1 parent cdb88dd commit 4bc36d6
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 2 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@ export(gridfs)
export(mongo)
export(mongo_options)
export(oid_to_timestamp)
export(read_bson)
export(ssl_options)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importFrom(jsonlite,validate)
useDynLib(mongolite,R_bigint_as_char)
useDynLib(mongolite,R_bson_reader_new)
useDynLib(mongolite,R_bson_reader_read)
useDynLib(mongolite,R_bson_to_json)
useDynLib(mongolite,R_bson_to_list)
useDynLib(mongolite,R_bson_to_raw)
Expand Down
41 changes: 41 additions & 0 deletions R/reader.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Standalone BSON reader
#'
#' Utility to parse BSON files into R without using MongoDB. This is useful
#' to read data from a `mongoexport` dump without needing local mongodb server.
#' This reads all data in memory. To import a bson dump into your local mongodb
#' server, use the [mongo$import][mongo] function instead.
#'
#' @export
#' @useDynLib mongolite R_bson_reader_new R_bson_reader_read
#' @param con either a path to a file, a url, or a a connection object
#' @param as_json return data as json strings instead of R lists
#' @param verbose print some output as we read
#' @examples
#' diamonds <- read_bson("http://jeroen.github.io/data/diamonds.bson")
read_bson <- function(con, as_json = FALSE, verbose = TRUE){
if(length(con) && is.character(con)){
con <- if(grepl("^https?://", con)){
url(con)
} else {
file(normalizePath(con, mustWork = TRUE), raw = TRUE)
}
}
stopifnot(inherits(con, 'connection'))
open(con, 'rb')
on.exit(close(con))
reader <- .Call(R_bson_reader_new, con)
output <- new.env(parent = emptyenv())
i <- 0
one <- function(as_json = FALSE){
.Call(R_bson_reader_read, reader, as_json)
}
while(length(obj <- one(as_json))){
i <- i+1
if(isTRUE(verbose))
cat("\rRead", i, file = stderr())
output[[sprintf('%09d', i)]] <- obj
}
if(isTRUE(verbose))
cat("\rDone!\n", file = stderr())
unname(as.list(output, sorted = TRUE))
}
24 changes: 24 additions & 0 deletions man/read_bson.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/mongolite.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ SEXP client2r(mongoc_client_t *client);
SEXP gridfs2r(mongoc_gridfs_t *fs, SEXP prot);
void mongolite_log_handler (mongoc_log_level_t log_level, const char *log_domain, const char *message, void *user_data);
SEXP ConvertObject(bson_iter_t* iter, bson_iter_t* counter);
SEXP bson2list(bson_t *b);
SEXP bson2list(const bson_t *b);
SEXP bson_to_str(const bson_t * b);
29 changes: 29 additions & 0 deletions src/reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,32 @@ SEXP R_mongo_restore(SEXP con, SEXP ptr_col, SEXP verb) {
mongoc_bulk_operation_destroy (bulk);
return Rf_ScalarInteger(count);
}

static void fin_bson_reader(SEXP ptr){
if(!R_ExternalPtrAddr(ptr)) return;
bson_reader_destroy(R_ExternalPtrAddr(ptr));
R_SetExternalPtrProtected(ptr, R_NilValue);
R_ClearExternalPtr(ptr);
}

SEXP R_bson_reader_new(SEXP con) {
bson_reader_t *reader = bson_reader_new_from_handle(con, bson_reader_feed, bson_reader_finalize);
SEXP ptr = PROTECT(R_MakeExternalPtr(reader, R_NilValue, con));
R_RegisterCFinalizerEx(ptr, fin_bson_reader, 1);
Rf_setAttrib(ptr, R_ClassSymbol, Rf_mkString("bson_reader"));
UNPROTECT(1);
return ptr;
}

SEXP R_bson_reader_read(SEXP ptr, SEXP as_json){
bson_reader_t *reader = R_ExternalPtrAddr(ptr);
if(!reader)
Rf_error("This reader has been destroyed.");
bool reached_eof = 0;
const bson_t *doc = bson_reader_read (reader, &reached_eof);
if(reached_eof)
return R_NilValue;
if(doc == NULL)
Rf_error("Failed to read all documents");
return Rf_asLogical(as_json) ? bson_to_str(doc) : bson2list(doc);
}
2 changes: 1 addition & 1 deletion src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ SEXP mkRaw(const unsigned char *buf, int len){
return out;
}

SEXP bson2list(bson_t *b){
SEXP bson2list(const bson_t *b){
bson_iter_t iter1;
bson_iter_t iter2;
bson_iter_init(&iter1, b);
Expand Down

0 comments on commit 4bc36d6

Please sign in to comment.