From 7a26724414f2cee3eafd0f0642dddacdc725ceb8 Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Thu, 22 Jun 2023 13:36:03 +0200 Subject: [PATCH 1/7] added tranpose functionality, fixed linting --- tools/waveica/waveica_wrapper.R | 82 +++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/tools/waveica/waveica_wrapper.R b/tools/waveica/waveica_wrapper.R index 6167bb10..f4037176 100644 --- a/tools/waveica/waveica_wrapper.R +++ b/tools/waveica/waveica_wrapper.R @@ -1,6 +1,14 @@ -read_file <- function(file, metadata, ft_ext, mt_ext) { +read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { data <- read_data(file, ft_ext) + if (transpose) { + col_names <- c("sampleName", data[[1]]) + t_data <- data[-1] + t_data <- t(t_data) + data <- data.frame(rownames(t_data), t_data) + colnames(data) <- col_names + } + if (!is.na(metadata)) { mt_data <- read_data(metadata, mt_ext) data <- merge(mt_data, data, by = "sampleName") @@ -24,6 +32,7 @@ read_data <- function(file, ext) { waveica <- function(file, metadata = NA, ext, + transpose = FALSE, wavelet_filter, wavelet_length, k, @@ -31,17 +40,19 @@ waveica <- function(file, t2, alpha, exclude_blanks) { - # get input from the Galaxy, preprocess data ext <- strsplit(x = ext, split = "\\,")[[1]] ft_ext <- ext[1] mt_ext <- ext[2] - data <- read_file(file, metadata, ft_ext, mt_ext) + data <- read_file(file, metadata, ft_ext, mt_ext, transpose) - required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") - verify_input_dataframe(data, required_columns) + required_columns <- c( + "sampleName", "class", "sampleType", + "injectionOrder", "batch" + ) + data <- verify_input_dataframe(data, required_columns) data <- sort_by_injection_order(data) @@ -76,28 +87,30 @@ waveica <- function(file, waveica_singlebatch <- function(file, metadata = NA, ext, + transpose = FALSE, wavelet_filter, wavelet_length, k, alpha, cutoff, exclude_blanks) { - # get input from the Galaxy, preprocess data ext <- strsplit(x = ext, split = "\\,")[[1]] ft_ext <- ext[1] mt_ext <- ext[2] - data <- read_file(file, metadata, ft_ext, mt_ext) + data <- read_file(file, metadata, ft_ext, mt_ext, transpose) required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") optional_columns <- c("batch") - verify_input_dataframe(data, required_columns) + + data <- verify_input_dataframe(data, required_columns) data <- sort_by_injection_order(data) - feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] + feature_columns <- colnames(data)[!colnames(data) %in% + c(required_columns, optional_columns)] features <- data[, feature_columns] injection_order <- data$injectionOrder @@ -112,7 +125,7 @@ waveica_singlebatch <- function(file, ) data[, feature_columns] <- features - + group <- enumerate_groups(as.character(data$sampleType)) # remove blanks from dataset if (exclude_blanks) { data <- exclude_group(data, group) @@ -142,9 +155,49 @@ verify_input_dataframe <- function(data, required_columns) { stop("Error: dataframe cannot contain NULL values! Make sure that your dataframe does not contain empty cells") } else if (!all(required_columns %in% colnames(data))) { - stop("Error: missing metadata! -Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", ")) + stop( + "Error: missing metadata! +Make sure that the following columns are present in your dataframe: ", + paste(required_columns, collapse = ", ") + ) } + + data <- verify_column_types(data, required_columns) + + return(data) +} + +verify_column_types <- function(data, required_columns) { + # Specify the column names and their expected types + column_types <- list( + "sampleName" = c("character", "factor"), + "class" = c("character", "factor"), + "sampleType" = c("character", "factor"), + "injectionOrder" = "integer", + "batch" = "integer" + ) + + column_types <- column_types[required_columns] + + for (col_name in names(data)) { + actual_type <- class(data[[col_name]]) + if (col_name %in% names(column_types)) { + expected_types <- column_types[[col_name]] + + if (!actual_type %in% expected_types) { + stop( + "Column ", col_name, " is of type ", actual_type, + " but expected type is ", + paste(expected_types, collapse = " or "), "\n" + ) + } + } else { + if (actual_type != "numeric") { + data[[col_name]] <- as.numeric(as.character(data[[col_name]])) + } + } + } + return(data) } @@ -187,7 +240,10 @@ store_data <- function(data, output, ext) { if (ext == "csv") { write.csv(data, file = output, row.names = FALSE, quote = FALSE) } else if (ext == "tsv") { - write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) + write.table(data, + file = output, sep = "\t", + row.names = FALSE, quote = FALSE + ) } else { arrow::write_parquet(data, sink = output) } From 4c7989854f5ddb833fc653e87c781e87c6a3c815 Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Thu, 22 Jun 2023 13:36:47 +0200 Subject: [PATCH 2/7] added transpose option, added test --- .../feature_table_transpose_version.csv | 5 ++ .../feature_table_transpose_version.parquet | Bin 0 -> 3723 bytes tools/waveica/waveica.xml | 48 +++++++++++++++--- 3 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 tools/waveica/test-data/feature_table_transpose_version.csv create mode 100644 tools/waveica/test-data/feature_table_transpose_version.parquet diff --git a/tools/waveica/test-data/feature_table_transpose_version.csv b/tools/waveica/test-data/feature_table_transpose_version.csv new file mode 100644 index 00000000..663ae992 --- /dev/null +++ b/tools/waveica/test-data/feature_table_transpose_version.csv @@ -0,0 +1,5 @@ +id,VT_160120_002,VT_160120_004,VT_160120_006,VT_160120_008,VT_160120_010 +M85T34,228520.06430737,90217.384387202,235656.75288383896,16622.9351783435,62385.0742465736 +M86T41,35646729.21543971,35735702.457215995,37021134.452711605,44302499.262606,44639738.0735709 +M86T518,2386896.97966461,2456290.69621518,8873450.40260241,2466946.89667101,2389372.85729467 +M86T539,1026645.83653468,1089246.46040563,837856.449608585,994979.069689685,954938.131337246 diff --git a/tools/waveica/test-data/feature_table_transpose_version.parquet b/tools/waveica/test-data/feature_table_transpose_version.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4bf36d05e69e57836a170d034a7e8ea826d1c8fe GIT binary patch literal 3723 zcmcgv&2Jk;6knS*b(gd?G;7&XBzkBU1nqXdcNUeyIB7y~)~U@VO?s&uCpB@MCO9ch zA`bik95`~QDug(|4RPYYfdi^IRh+7dQxBXt@)z)C)=A@KT}2VfYCRwC`~Bv)p#EmSu&KN0gnl^CDvS%;~WkBH~z{F&tRZz_FY$r+5{V^3I4e zGA^ul&V>p>~-H&$e>kB|7L~?>WBp5|L_47cYTrlC2z$2J58u*#98-z+B z1K|aJAb^R1gb{%y_aqgZG9gp4$|o%q^GPjmH<0M}r(Y&|ZXvph1;PZ~MTnT3f6Eqr z&$^FKxi7Lh>!UbG1>(_RAgUi~AOb|N?@`I9^a&!Fi_aG>l|GnDCfD3~@nIfVhw_u5lPtO5*VI*&#SEgTt@c!jr7~W6u4@l+O|Dr^4_} z#(8v554UDbhHz$Fcs?gmQb_|iX$lx%Oc~%ne2fK2#Vd)>zRD(uK19=9I=ace-H+s#g6Ys)5_8Ee9<^e*`^ zwm3HKKq_`e?6sBsHKz8!FN7glU^YfzR>n~*THe+B+bvMLd9xDsHg>D+8B&ezSHkE% zottd2iS~SDVWB=f8?`FcDBPK7&-7=IkCrN7&FJg^5B92ZXL<@PeazRJ>r>lzY8yM@ zByVceP z4x>0GSnE9Em|B~gceLLA9^zc+)bD}sUKq{v7WY&yJS?BWH-2BCKj1G#f9}=(cf$R= z5_t^9r7BB{b<|w%tgS58I|ieEOw}IXMt1P>Z4USvoZwWK^wBQqTEKb*6{S>pOOJDU zh>N7AOAj{Xa@`P3k?2C|KUF;^@Pc^tvj>`iI);AF_@6ZTz2oRtb(GUu|Gn0+^mX_j zY5nWR$sg(k^h2#LYpPsR6A+D3&A6`sytoNLrZ#Un5tw>oMl+cBJNID?qF~}B=L4(w y)@Gyo>BH6SYt7BgYmIJqqj%}E)$Z2X#)C`nW70#V4*c>+yw(pn%en=BL;nR5Zvv74 literal 0 HcmV?d00001 diff --git a/tools/waveica/waveica.xml b/tools/waveica/waveica.xml index c8a941de..ebd7304a 100644 --- a/tools/waveica/waveica.xml +++ b/tools/waveica/waveica.xml @@ -19,6 +19,7 @@ #if $input_num.input_choice == "2": metadata = "$input_num.input_metadata.metadata", ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext", + transpose = $input_num.transpose_feature_table, #else: ext = "$input_num.data.ext", #end if @@ -36,6 +37,7 @@ #if $input_num.input_choice == "2": metadata = "$input_num.input_metadata.metadata", ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext", + transpose = $input_num.transpose_feature_table, #else: ext = "$input_num.data.ext", #end if @@ -61,10 +63,12 @@ +
-
- + +
@@ -93,7 +97,7 @@ - + @@ -104,7 +108,7 @@ - + @@ -115,7 +119,7 @@ - + @@ -128,7 +132,7 @@ - + @@ -141,7 +145,7 @@ - + @@ -154,13 +158,41 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + +