diff --git a/DESCRIPTION b/DESCRIPTION index bd7b802..3e3b534 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pkglite Title: Compact Package Representations -Version: 0.2.3.9000 +Version: 0.2.3.9001 Authors@R: c( person("Nan", "Xiao", email = "nan.xiao1@merck.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0250-5673")), diff --git a/NEWS.md b/NEWS.md index 59aba01..9d97284 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# pkglite 0.2.3.9000 +# pkglite 0.2.3.9001 ## Enhancements @@ -6,6 +6,8 @@ This helps identifying the correct file collections for R packages with `src/Makevars` or `src/Makefile`, for example, packages that interface with Stan via the rstan package (#56). +- Expand the binary file extension dictionary to cover files + frequently used in machine learning frameworks (#57). # pkglite 0.2.3 diff --git a/R/dictionary.R b/R/dictionary.R index 8855c88..2d2a1de 100644 --- a/R/dictionary.R +++ b/R/dictionary.R @@ -122,8 +122,9 @@ ext_binary <- function(flat = FALSE) { "audio" = c("wav", "mp3", "mid", "ogg", "au", "m4a"), "video" = c("mp4", "avi", "mov", "mkv", "webm"), "generic" = c( - "bin", "epub", "h5", "hdf5", "onnx", - "parquet", "feather", "pkl", "npy" + "bin", "epub", "hdf5", "h5", "parquet", "feather", "msgpack", + "pickle", "pkl", "npy", "npz", "safetensors", + "pt", "pth", "keras", "tfrecord", "pb", "ckpt", "onnx" ) ) if (flat) unique(unlist(x)) else x diff --git a/tests/testthat/test-independent-test_dictionary.R b/tests/testthat/test-independent-test_dictionary.R index a699ad5..32feccc 100644 --- a/tests/testthat/test-independent-test_dictionary.R +++ b/tests/testthat/test-independent-test_dictionary.R @@ -77,8 +77,9 @@ test_that("Test ext_binary() generate the right string list", { "audio" = c("wav", "mp3", "mid", "ogg", "au", "m4a"), "video" = c("mp4", "avi", "mov", "mkv", "webm"), "generic" = c( - "bin", "epub", "h5", "hdf5", "onnx", - "parquet", "feather", "pkl", "npy" + "bin", "epub", "hdf5", "h5", "parquet", "feather", "msgpack", + "pickle", "pkl", "npy", "npz", "safetensors", + "pt", "pth", "keras", "tfrecord", "pb", "ckpt", "onnx" ) ) diff --git a/tests/testthat/test-independent-test_templates.R b/tests/testthat/test-independent-test_templates.R index 6a723c3..020ef11 100644 --- a/tests/testthat/test-independent-test_templates.R +++ b/tests/testthat/test-independent-test_templates.R @@ -187,7 +187,7 @@ test_that("file_vignettes() creates the correct 'file_spec' objects", { (!is_spec_binary & is_file_spec_type( fs_source = fs, path = "vignettes/", - pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.h5$|\\.hdf5$|\\.onnx$|\\.parquet$|\\.feather$|\\.pkl$|\\.npy$", + pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.hdf5$|\\.h5$|\\.parquet$|\\.feather$|\\.msgpack$|\\.pickle$|\\.pkl$|\\.npy$|\\.npz$|\\.safetensors$|\\.pt$|\\.pth$|\\.keras$|\\.tfrecord$|\\.pb$|\\.ckpt$|\\.onnx$", format = "binary", recursive = TRUE, ignore_case = TRUE, @@ -332,7 +332,7 @@ test_that("file_default() creates the correct 'file_spec' objects", { (!is_spec_binary & is_file_spec_type( fs_source = fs, path = "vignettes/", - pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.h5$|\\.hdf5$|\\.onnx$|\\.parquet$|\\.feather$|\\.pkl$|\\.npy$", + pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.hdf5$|\\.h5$|\\.parquet$|\\.feather$|\\.msgpack$|\\.pickle$|\\.pkl$|\\.npy$|\\.npz$|\\.safetensors$|\\.pt$|\\.pth$|\\.keras$|\\.tfrecord$|\\.pb$|\\.ckpt$|\\.onnx$", format = "binary", recursive = TRUE, ignore_case = TRUE, @@ -534,7 +534,7 @@ test_that("file_auto() creates the correct 'file_spec' objects", { (!is_spec_binary & is_file_spec_type( fs_source = fs, path = "inst/", - pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.h5$|\\.hdf5$|\\.onnx$|\\.parquet$|\\.feather$|\\.pkl$|\\.npy$", + pattern = "\\.rda$|\\.rds$|\\.RData$|\\.jpg$|\\.jpeg$|\\.pdf$|\\.png$|\\.bmp$|\\.gif$|\\.tif$|\\.tiff$|\\.emf$|\\.svgz$|\\.ico$|\\.webp$|\\.eps$|\\.ppm$|\\.pgm$|\\.pbm$|\\.pnm$|\\.xcf$|\\.psd$|\\.graffle$|\\.o$|\\.so$|\\.rdb$|\\.rdx$|\\.woff2$|\\.woff$|\\.otf$|\\.ttf$|\\.eot$|\\.docx$|\\.xlsx$|\\.pptx$|\\.xltx$|\\.potx$|\\.doc$|\\.xls$|\\.ppt$|\\.xlsb$|\\.xlsm$|\\.odt$|\\.ods$|\\.odp$|\\.odg$|\\.odc$|\\.odf$|\\.odi$|\\.odm$|\\.odb$|\\.sas7bdat$|\\.sas7bcat$|\\.xpt$|\\.xpt5$|\\.xpt8$|\\.zip$|\\.tar$|\\.gz$|\\.tgz$|\\.bz2$|\\.7z$|\\.xz$|\\.sqlite$|\\.sqlite3$|\\.dbf$|\\.accdb$|\\.mdb$|\\.pyc$|\\.jar$|\\.mo$|\\.shx$|\\.shp$|\\.laz$|\\.sbx$|\\.sbn$|\\.nc$|\\.gpkg$|\\.bam$|\\.bai$|\\.wav$|\\.mp3$|\\.mid$|\\.ogg$|\\.au$|\\.m4a$|\\.mp4$|\\.avi$|\\.mov$|\\.mkv$|\\.webm$|\\.bin$|\\.epub$|\\.hdf5$|\\.h5$|\\.parquet$|\\.feather$|\\.msgpack$|\\.pickle$|\\.pkl$|\\.npy$|\\.npz$|\\.safetensors$|\\.pt$|\\.pth$|\\.keras$|\\.tfrecord$|\\.pb$|\\.ckpt$|\\.onnx$", format = "binary", recursive = TRUE, ignore_case = TRUE,