-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #94 from surveilr/ani/elt-dev-transform
feat: ELT Feature using CTR, DFA, DCLP1 Datasets #29
- Loading branch information
Showing
19 changed files
with
5,778 additions
and
176 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
26 changes: 26 additions & 0 deletions
26
lib/service/diabetes-research-hub/dataset-prep-transformation/cgm-file-metadata-generator.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Define the path for the output CSV file | ||
const outputFilePath = './detrended-fluctation-analysis/S1/detrended-fluctation-analysis-csv-dataset/supporting-files/cgm_file_metadata.csv'; | ||
|
||
// Function to generate CGM metadata | ||
const generateCgmMetadata = async (numRecords: number) => { | ||
const csvHeader = 'metadata_id,devicename,device_id,source_platform,patient_id,file_name,file_format,file_upload_date,data_start_date,data_end_date,study_id\n'; | ||
let csvContent = csvHeader; | ||
|
||
for (let i = 1; i <= numRecords; i++) { | ||
const metadataId = `MD-${String(i).padStart(3, '0')}`; // Format as MD-001, MD-002, ... | ||
const devicename =`Medtronic MiniMed`; | ||
const patientId = `${i}`; // Patient ID as 1, 2, ... | ||
const fileName = `case ${i}`; // File name as case 1, case 2, ... | ||
|
||
// Create a CSV line with the specified structure | ||
const csvLine = `${metadataId},${devicename},,,"${patientId}",${fileName},csv,,,,DFA\n`; | ||
csvContent += csvLine; | ||
} | ||
|
||
// Write the CSV content to a file | ||
await Deno.writeTextFile(outputFilePath, csvContent.trim()); | ||
console.log(`CSV file generated at: ${outputFilePath}`); | ||
}; | ||
|
||
// Generate 209 records | ||
await generateCgmMetadata(209); |
61 changes: 61 additions & 0 deletions
61
lib/service/diabetes-research-hub/dataset-prep-transformation/clean-csv.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import * as fs from 'node:fs'; | ||
import * as path from 'node:path'; | ||
import { copySync, ensureDirSync } from "https://deno.land/std@0.203.0/fs/mod.ts"; // Use Deno's fs utilities | ||
import { parse } from "https://deno.land/std@0.203.0/csv/mod.ts"; // Deno CSV parser | ||
|
||
// Function to clean a CSV file (removing any 'Unnamed' columns) | ||
async function cleanCsv(filePath: string, outputFolder: string): Promise<void> { | ||
try { | ||
// Read and process the CSV file | ||
const fileContent = await Deno.readTextFile(filePath); | ||
const rows = await parse(fileContent, { skipFirstRow: false }); | ||
|
||
const cleanedRows = rows.map((row: Record<string, string>) => | ||
Object.fromEntries( | ||
Object.entries(row).filter(([key]) => !key.startsWith('Unnamed')) | ||
) | ||
); | ||
|
||
// Write the cleaned CSV to the output folder | ||
const filename = path.basename(filePath); | ||
const newFilePath = path.join(outputFolder, filename); | ||
const csvContent = cleanedRows.map((row: Record<string, any>) => Object.values(row).join(',')).join('\n'); | ||
|
||
await Deno.writeTextFile(newFilePath, csvContent); | ||
console.log(`Successfully cleaned and moved '${filePath}' to '${newFilePath}'`); | ||
} catch (error) { | ||
console.error(`Error processing file '${filePath}': ${error.message}`); | ||
} | ||
} | ||
|
||
// Function to rename the file by replacing spaces with underscores | ||
function renameFileWithNoSpaces(filePath: string, outputFolder: string): string { | ||
const filename = path.basename(filePath).replace(/ /g, '_'); | ||
const newFilePath = path.join(outputFolder, filename); | ||
copySync(filePath, newFilePath); | ||
|
||
console.log(`Renamed and moved file from '${filePath}' to '${newFilePath}'`); | ||
return newFilePath; | ||
} | ||
|
||
// Main function to process files in the input folder | ||
async function processFilesInFolder(inputFolder: string, outputSubfolderName: string = 'detrended-fluctation-analysis-csv-dataset') { | ||
const outputFolder = path.join(inputFolder, outputSubfolderName); | ||
ensureDirSync(outputFolder); // Ensure the output folder exists | ||
|
||
for (const entry of Deno.readDirSync(inputFolder)) { | ||
if (entry.isFile && entry.name.endsWith('.csv')) { | ||
const filePath = path.join(inputFolder, entry.name); | ||
|
||
// Rename and move the file to the new folder | ||
const newFilePath = renameFileWithNoSpaces(filePath, outputFolder); | ||
|
||
// Clean the CSV after renaming and moving | ||
await cleanCsv(newFilePath, outputFolder); | ||
} | ||
} | ||
} | ||
|
||
// Example usage | ||
const inputFolder = './detrended-fluctation-analysis/S1'; // Replace with your folder path | ||
await processFilesInFolder(inputFolder); |
40 changes: 40 additions & 0 deletions
40
...diabetes-research-hub/dataset-prep-transformation/text-to-csv-converter-commadelimiter.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import { ensureDirSync } from "https://deno.land/std/fs/mod.ts"; | ||
import * as path from "https://deno.land/std/path/mod.ts"; | ||
import Papa from "https://esm.sh/papaparse"; | ||
|
||
const folderPath = './ctr-anderson'; | ||
const outputFolder = './ctr-anderson/ctr-anderson-with-comma'; | ||
|
||
// Ensure output folder exists | ||
ensureDirSync(outputFolder); | ||
|
||
// Function to convert text files to CSV | ||
async function convertTxtToCsvSpace(filePath: string, newFilePath: string) { | ||
// Read the content of the file | ||
const fileContent = await Deno.readTextFile(filePath); | ||
|
||
// Parse the text file with pipe delimiter | ||
const results = Papa.parse(fileContent, { | ||
delimiter: "|", // Specify pipe delimiter | ||
skipEmptyLines: true, | ||
header: false, // Change to true if the first line is a header | ||
}); | ||
|
||
// Convert parsed results to CSV format with comma delimiter | ||
const csvContent = results.data.map((row: string[]) => row.join(",")).join("\n"); | ||
|
||
// Write to a new CSV file | ||
await Deno.writeTextFile(newFilePath, csvContent); | ||
console.log(`Converted ${path.basename(filePath)} to ${path.basename(newFilePath)}`); | ||
} | ||
|
||
// Iterate through all files in the folder | ||
for await (const entry of Deno.readDir(folderPath)) { | ||
if (entry.isFile && entry.name.endsWith('.txt')) { | ||
const filePath = path.join(folderPath, entry.name); | ||
const newFileName = entry.name.replace('.txt', '.csv'); | ||
const newFilePath = path.join(outputFolder, newFileName); | ||
|
||
await convertTxtToCsvSpace(filePath, newFilePath); | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
lib/service/diabetes-research-hub/dataset-prep-transformation/txt-to-csv-spacetocomma.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import { ensureDirSync } from "https://deno.land/std/fs/mod.ts"; | ||
import * as path from "https://deno.land/std/path/mod.ts"; | ||
|
||
// Define the input text file and output CSV file paths | ||
const inputFilePath = './detrended-fluctation-analysis/clinical_data.txt'; // Input text file path | ||
const outputFolder = './detrended-fluctation-analysis/S1/detrended-fluctation-analysis-csv-dataset'; // Output folder path | ||
const outputFilePath = path.join(outputFolder, 'clinical_data.csv'); // Output CSV file path | ||
|
||
// Ensure output directory exists | ||
ensureDirSync(outputFolder); | ||
|
||
// Function to convert text data to CSV format | ||
async function convertTextToCsvCommadelimiter(inputFilePath: string) { | ||
// Read the input text file | ||
const text = await Deno.readTextFile(inputFilePath); | ||
|
||
// Split the input text into lines | ||
const lines = text.split('\n'); | ||
|
||
// Prepare the CSV data array | ||
const csvData: string[] = []; | ||
|
||
// Set the header | ||
const headers = ['pid', 'gender', 'age', 'BMI', 'glycaemia', 'HbA1c', 'follow.up', 'T2DM']; | ||
csvData.push(headers.join(',')); // Join headers with comma | ||
|
||
// Process each line after the header | ||
for (const line of lines.slice(1)) { | ||
if (line.trim() === "") continue; // Skip empty lines | ||
// Replace double quotes and split by space | ||
const row = line.replace(/"/g, '').split(/\s+/); | ||
|
||
// Prepend the row with a pid value (based on the first column value) | ||
const pid = row[0]; // Use the value in the first column as pid | ||
const newRow = [pid, ...row.slice(1)]; // Keep the rest of the columns | ||
|
||
// Join the new row with comma and add to csvData | ||
csvData.push(newRow.join(',')); | ||
} | ||
|
||
// Write the CSV data to a file | ||
await Deno.writeTextFile(outputFilePath, csvData.join('\n')); | ||
console.log(`Successfully converted to CSV: ${outputFilePath}`); | ||
} | ||
|
||
// Convert the input text file to CSV | ||
await convertTextToCsvCommadelimiter(inputFilePath); |
Oops, something went wrong.