-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwasm-icare.js
835 lines (782 loc) · 50.3 KB
/
wasm-icare.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
/**
* @module wasm-icare
*/
/**
* Wrapper class to hold the iCARE Wasm object and add web-specific functionalities to its methods.
* @class
* @property {object} icare - The iCARE Wasm object.
* @property {string} __version__ - The version of the iCARE Python package.
*/
class WasmICARE {
/**
* Constructor for the WasmICARE class. This class is not meant to be instantiated directly. Use the
* 'initialize()' method to instantiate this class.
*/
constructor() {
// Wasm-iCARE version.
this.version = '1.1.0';
// Version of the iCARE Python package to load from PyPI.
this.pyICareVersion = '1.0.0';
// Version of Pyodide to load from the CDN.
this.pyodideVersion = '0.23.2';
// Files that are pre-loaded to the Pyodide file system.
this.preloadedFiles = [];
}
/**
* Factory method to instantiate the WasmICARE class.
* @returns {Promise<WasmICARE>}
*/
static async initialize() {
const instance = new WasmICARE();
// instantiate Pyodide
const pyodideEsmUrl = 'https://cdn.jsdelivr.net/npm/pyodide@' + instance.pyodideVersion + '/+esm';
const pyodideRootUrl = 'https://cdn.jsdelivr.net/pyodide/v' + instance.pyodideVersion + '/full/';
instance.pyodide = await (await import(pyodideEsmUrl)).loadPyodide({indexURL: pyodideRootUrl});
// instantiate iCARE
await instance.pyodide.loadPackage('micropip');
const micropip = instance.pyodide.pyimport('micropip');
await micropip.install('pyicare=='.concat(instance.pyICareVersion));
instance.pyodide.runPython(`import icare`);
return instance;
}
_getFileNameOrNone(url) {
return url ? JSON.stringify(url.substring(url.lastIndexOf('/') + 1)) : 'None';
}
_valueOrNone(value) {
return value ? JSON.stringify(value) : 'None';
}
/**
* Method to load files from a list of URLs and write them to the Pyodide file system.
* @param fileURLs
* @returns {Promise<Awaited<unknown>[]>}
*/
async fetchFilesAndWriteToPyodideFS(fileURLs) {
if (!this.pyodide) {
throw new Error('Please instantiate this class using the WasmICARE.initialize() method.');
}
const fetchAndWriteFile = async (url) => {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch file from ${url}`);
}
try {
const fileContent = await response.text();
const fileName = url.substring(url.lastIndexOf('/') + 1);
this.pyodide.FS.writeFile(fileName, fileContent);
return {isError: false, message: `File ${fileName} successfully loaded to the Pyodide file system.`};
} catch (error) {
throw new Error(`Error fetching and writing file: ${error.message}`);
}
}
return await Promise.all(fileURLs.map(fetchAndWriteFile));
}
/**
* Method to load files from a list of URLs and write them to the Pyodide file system. This utility allows you to
* save time from network latencies when making several calls to computing absolute risk. When files are loaded
* using this option, they will not be re-loaded when computing absolute risk.
* @param fileURLs
* @returns {Promise<Awaited<*>[]>}
*/
async preloadFiles(fileURLs) {
if (!this.pyodide) {
throw new Error('Please instantiate this class using the WasmICARE.initialize() method.');
}
const fileLoadAndWritePromises = await this.fetchFilesAndWriteToPyodideFS(fileURLs);
this.preloadedFiles = fileURLs;
return fileLoadAndWritePromises;
}
/**
* Method to convert the Wasm-iCARE output to JSON.
* @param obj
* @returns {{}|*}
*/
convertOutputToJSON(obj) {
if (obj instanceof Map) {
const result = {};
obj.forEach((value, key) => {
result[key] = this.convertOutputToJSON(value);
});
return result;
}
if (Array.isArray(obj)) {
return obj.map((item) => this.convertOutputToJSON(item));
}
return obj;
}
/**
* This method is used to build absolute risk models and apply them to estimate absolute risks.
* @async
* @function
* @param applyAgeStart
* Age(s) for the start of the interval, over which, to compute the absolute risk. If a single integer is provided,
* all instances in the profiles ('applyCovariateProfileUrl' and/or 'applySnpProfileUrl') are assigned this start
* age for the interval. If a different start age needs to be assigned for each instance, provide an array of ages
* as integers of the same length as the number of instances in these profiles.
* @param applyAgeIntervalLength
* Number of years over which to compute the absolute risk. That is to say that the age at the end of the interval
* is 'applyAgeStart' + 'applyAgeIntervalLength'. If a single integer is provided, all instances in the profiles
* ('applyCovariateProfileUrl' and/or 'applySnpProfileUrl') are assigned this interval length. If a different
* interval length needs to be assigned for each instance, provide an array of interval lengths as integers of the
* same length as the number of instances in these profiles.
* @param modelDiseaseIncidenceRatesUrl
* A URL to a CSV file containing the age-specific disease incidence rates for the population of interest. The data
* in the file must either contain two columns, named: ['age', 'rate'], to specify the incidence rates associated
* with each age group; or three columns, named: ['start_age', 'end_age', 'rate'], to specify the incidence rates
* associated with each age interval. The age ranges must fully cover the age intervals specified using parameters
* 'applyAgeStart' and 'applyAgeIntervalLength'.
* @param modelCompetingIncidenceRatesUrl
* A URL to a CSV file containing the age-specific incidence rates for competing events in the population of
* interest. The data in the file must either contain two columns, named: ['age', 'rate'], to specify the incidence
* rates associated with each age group; or three columns, named: ['start_age', 'end_age', 'rate'], to specify the
* incidence rates associated with each age interval. The age ranges must fully cover the age intervals specified
* using parameters 'applyAgeStart' and 'applyAgeIntervalLength'.
* @param modelCovariateFormulaUrl
* A URL to a text file containing a Patsy symbolic description string of the model to be fitted,
* e.g. Y ~ parity + family_history.
* Reference: https://patsy.readthedocs.io/en/latest/formulas.html#the-formula-language
* Please make sure that the variable name in your dataset is not from the namespace of the Python execution
* context, including Python standard library, numpy, pandas, patsy, and icare. For example, a variable name "C"
* and "Q" would conflict with Patsy built-in functions of the same name. Variable names with the R-style periods
* in them should be surrounded by the Patsy quote function Q(family.history). In Python, periods are used to
* access attributes of objects, so they are not allowed in Patsy variable names unless surrounded by Q(). Patsy
* language is similar to R's formula object (https://patsy.readthedocs.io/en/latest/R-comparison.html).
* @param modelLogRelativeRiskUrl
* A URL to a JSON file containing the log odds ratios, of the variables in the model except the intercept term, in
* association with the disease. The first-level JSON keys should correspond to the variable names generated by
* Patsy when building the design matrix. Their values should correspond to the log odds ratios of the variable's
* association with the disease.
* @param modelReferenceDatasetUrl
* A URL to a CSV file containing the reference dataset with risk factor distribution that is representative of
* the population of interest. No missing values are permitted in this dataset.
* @param modelReferenceDatasetWeightsVariableName
* A string specifying the name of the variable in the dataset at 'modelReferenceDatasetUrl' that indicates the
* sampling weight for each instance. If set to None (default), then a uniform weight will be assigned to each
* instance.
* @param modelSnpInfoUrl
* A URL to a CSV file containing the information about the SNPs in the model. The data should contain three
* columns, named: ['snp_name', 'snp_odds_ratio', 'snp_freq'] corresponding to the SNP ID, the odds ratio of the
* SNP in association with the disease, and the minor allele frequency, respectively.
* @param modelFamilyHistoryVariableName
* A string specifying the name of the binary variable (values: {0, 1}; missing values are permitted) in the model
* formula ('modelCovariateFormulaUrl') that represents the family history of the disease. This needs to be
* specified when using the special SNP model option so that the effect of family history can be adjusted for the
* presence of the SNPs.
* @param numImputations
* The number of imputations for handling missing SNPs.
* @param applyCovariateProfileUrl
* A URL to a CSV file containing the covariate (risk factor) profiles of the individuals for whom the absolute
* risk is to be computed. Missing values are permitted.
* @param applySnpProfileUrl
* A URL to a CSV file containing the SNP profiles (values: {0: homozygous reference alleles, 1: heterozygous,
* 2: homozygous alternate alleles}) of the individuals for whom the absolute risk is to be computed. Missing
* values are permitted.
* @param returnLinearPredictors
* Set true to return the calculated linear predictor values for each individual in the 'applyCovariateProfileUrl'
* and/or 'applySnpProfileUrl' datasets.
* @param returnReferenceRisks
* Set true to return the absolute risk estimates for each individual in the 'modelReferenceDatasetUrl' dataset.
* @param seed
* Fix a seed for reproducibility.
* @returns {Promise<{}|*>}
* An object with the following keys—
* 1) 'model':
* An object of feature names and the associated beta values that were used to compute the absolute risk
* estimates.
* 2) 'profile':
* A records-oriented JSON of the input profile data, the specified age intervals, and the calculated
* absolute risk estimates. If 'returnLinearPredictors' is set to true, they are also included as an
* additional column.
* 3) 'reference_risks':
* If 'returnReferenceRisks' is true, this key will be present in the returned object. It will contain an
* array of objects, one per unique combination of the specified age intervals, containing age at the start
* of interval ('age_interval_start'), age at the end of interval ('age_interval_end'), and a list absolute
* risk estimates for the individuals in the reference dataset ('population_risks').
* 4) 'method':
* A string containing the name of the method used to calculate the absolute risk estimates. When this
* method is used, the method name is "iCARE - absolute risk".
*/
async computeAbsoluteRisk(
{
applyAgeStart,
applyAgeIntervalLength,
modelDiseaseIncidenceRatesUrl,
modelCompetingIncidenceRatesUrl,
modelCovariateFormulaUrl,
modelLogRelativeRiskUrl,
modelReferenceDatasetUrl,
modelReferenceDatasetWeightsVariableName,
modelSnpInfoUrl,
modelFamilyHistoryVariableName,
numImputations = 5,
applyCovariateProfileUrl,
applySnpProfileUrl,
returnLinearPredictors = false,
returnReferenceRisks = false,
seed = 1234,
}) {
if (!this.pyodide) {
throw new Error('Please instantiate this class using the WasmICARE.initialize() method.');
}
const fileURLs = [
modelDiseaseIncidenceRatesUrl,
modelCompetingIncidenceRatesUrl,
modelCovariateFormulaUrl,
modelLogRelativeRiskUrl,
modelReferenceDatasetUrl,
modelSnpInfoUrl,
applyCovariateProfileUrl,
applySnpProfileUrl,
].filter(url => url !== undefined).filter(url => !this.preloadedFiles.includes(url));
await this.fetchFilesAndWriteToPyodideFS(fileURLs);
applyAgeStart = this._valueOrNone(applyAgeStart);
applyAgeIntervalLength = this._valueOrNone(applyAgeIntervalLength);
modelDiseaseIncidenceRatesUrl = this._getFileNameOrNone(modelDiseaseIncidenceRatesUrl);
modelCompetingIncidenceRatesUrl = this._getFileNameOrNone(modelCompetingIncidenceRatesUrl);
modelCovariateFormulaUrl = this._getFileNameOrNone(modelCovariateFormulaUrl);
modelLogRelativeRiskUrl = this._getFileNameOrNone(modelLogRelativeRiskUrl);
modelReferenceDatasetUrl = this._getFileNameOrNone(modelReferenceDatasetUrl);
modelReferenceDatasetWeightsVariableName = this._valueOrNone(modelReferenceDatasetWeightsVariableName);
modelSnpInfoUrl = this._getFileNameOrNone(modelSnpInfoUrl);
modelFamilyHistoryVariableName = this._valueOrNone(modelFamilyHistoryVariableName);
numImputations = this._valueOrNone(numImputations);
applyCovariateProfileUrl = this._getFileNameOrNone(applyCovariateProfileUrl);
applySnpProfileUrl = this._getFileNameOrNone(applySnpProfileUrl);
returnLinearPredictors = returnLinearPredictors ? 'True' : 'False';
returnReferenceRisks = returnReferenceRisks ? 'True' : 'False';
seed = this._valueOrNone(seed);
let result = this.pyodide.runPython(`
result = icare.compute_absolute_risk(
apply_age_start = ${applyAgeStart},
apply_age_interval_length = ${applyAgeIntervalLength},
model_disease_incidence_rates_path = ${modelDiseaseIncidenceRatesUrl},
model_competing_incidence_rates_path = ${modelCompetingIncidenceRatesUrl},
model_covariate_formula_path = ${modelCovariateFormulaUrl},
model_log_relative_risk_path = ${modelLogRelativeRiskUrl},
model_reference_dataset_path = ${modelReferenceDatasetUrl},
model_reference_dataset_weights_variable_name = ${modelReferenceDatasetWeightsVariableName},
model_snp_info_path = ${modelSnpInfoUrl},
model_family_history_variable_name = ${modelFamilyHistoryVariableName},
num_imputations = ${numImputations},
apply_covariate_profile_path = ${applyCovariateProfileUrl},
apply_snp_profile_path = ${applySnpProfileUrl},
return_linear_predictors = ${returnLinearPredictors},
return_reference_risks = ${returnReferenceRisks},
seed = ${seed}
)
result
`).toJs();
if (result.isError) {
throw new Error(result.message);
}
result = this.convertOutputToJSON(result);
result['profile'] = JSON.parse(result['profile'])
return result;
}
/**
* This method is used to build an absolute risk model that incorporates different input parameters before and
* after a given time cut-point. The model is then applied to estimate the combined absolute risks.
* @async
* @function
* @param applyAgeStart
* Age(s) for the start of the interval, over which, to compute the absolute risk. If a single integer is provided,
* all instances in the profiles ('applyCovariateProfileCutpointUrl' and/or 'applySnpProfileUrl') are assigned this
* start age for the interval. If a different start age needs to be assigned for each instance, provide an array of
* ages as integers of the same length as the number of instances in these profiles. If an array is provided, the
* parameters 'applyAgeIntervalLength' and 'cutpoint' must also be arrays of the same length.
* @param applyAgeIntervalLength
* Number of years over which to compute the absolute risk. That is to say that the age at the end of the interval
* is 'applyAgeStart' + 'applyAgeIntervalLength'. If a single integer is provided, all instances in the profiles
* ('applyCovariateProfileCutpointUrl' and/or 'applySnpProfileUrl') are assigned this interval length. If a
* different interval length needs to be assigned for each instance, provide an array of interval lengths as
* integers of the same length as the number of instances in these profiles. If an array is provided, the
* parameters 'applyAgeStart' and 'cutpoint' must also be arrays of the same length.
* @param modelDiseaseIncidenceRatesUrl
* A URL to a CSV file containing the age-specific disease incidence rates for the population of interest. The data
* in the file must either contain two columns, named: ['age', 'rate'], to specify the incidence rates associated
* with each age group; or three columns, named: ['start_age', 'end_age', 'rate'], to specify the incidence rates
* associated with each age interval. The age ranges must fully cover the age intervals specified using parameters
* 'applyAgeStart' and 'applyAgeIntervalLength'.
* @param modelCompetingIncidenceRatesUrl
* A URL to a CSV file containing the age-specific incidence rates for competing events in the population of
* interest. The data in the file must either contain two columns, named: ['age', 'rate'], to specify the incidence
* rates associated with each age group; or three columns, named: ['start_age', 'end_age', 'rate'], to specify the
* incidence rates associated with each age interval. The age ranges must fully cover the age intervals specified
* using parameters 'applyAgeStart' and 'applyAgeIntervalLength'.
* @param modelCovariateFormulaBeforeCutpointUrl
* A URL to a text file containing the covariate formula for the model to be fit before the cut-point. The text
* should contain a string description of the covariate formula using the Patsy symbolic description language.
* Reference: https://patsy.readthedocs.io/en/latest/formulas.html#the-formula-language
* @param modelCovariateFormulaAfterCutpointUrl
* A URL to a text file containing the covariate formula for the model to be fit after the cut-point. The text
* should contain a string description of the covariate formula using the Patsy symbolic description language. If
* this value is undefined, the covariate formula before the cut-point is used.
* Reference: https://patsy.readthedocs.io/en/latest/formulas.html#the-formula-language
* @param modelLogRelativeRiskBeforeCutpointUrl
* A URL to a JSON file containing the log odds ratios, of the variables in the model except the intercept term, in
* association with the disease, for the model to be fit before the cut-point. The JSON file should contain an
* object with the variable names as keys and the log odds ratios as values.
* @param modelLogRelativeRiskAfterCutpointUrl
* A URL to a JSON file containing the log odds ratios, of the variables in the model except the intercept term,
* in association with the disease, for the model to be fit after the cut-point. The JSON file should contain an
* object with the variable names as keys and the log odds ratios as values. If this value is undefined, the
* log odds ratios before the cut-point are used.
* @param modelReferenceDatasetBeforeCutpointUrl
* A URL to a CSV file containing the reference dataset with risk factor distribution that is representative of
* the population of interest before the cut-point.
* @param modelReferenceDatasetAfterCutpointUrl
* A URL to a CSV file containing the reference dataset with risk factor distribution that is representative of
* the population of interest after the cut-point. If this value is undefined, the reference dataset before the
* cut-point is used.
* @param modelReferenceDatasetWeightsVariableNameBeforeCutpoint
* A string specifying the name of the variable in the dataset at 'modelReferenceDatasetBeforeCutpointUrl' that
* contains the sampling weights for each individual.
* @param modelReferenceDatasetWeightsVariableNameAfterCutpoint
* A string specifying the name of the variable in the dataset at 'modelReferenceDatasetAfterCutpointUrl' that
* contains the sampling weights for each individual. If this value is undefined, the weights variable name before
* the cut-point is used.
* @param modelSnpInfoUrl
* A URL to a CSV file containing the information about the SNPs in the model. The data should contain three
* columns, named: ['snp_name', 'snp_odds_ratio', 'snp_freq'] corresponding to the SNP ID, the odds ratio of the
* SNP in association with the disease, and the minor allele frequency, respectively.
* @param modelFamilyHistoryVariableNameBeforeCutpoint
* A string specifying the name of the binary variable (values: {0, 1}; missing values are permitted) in the
* dataset at 'modelReferenceDatasetBeforeCutpointUrl' that indicates whether the individual has a family history
* of the disease.
* @param modelFamilyHistoryVariableNameAfterCutpoint
* A string specifying the name of the binary variable (values: {0, 1}; missing values are permitted) in the
* dataset at 'modelReferenceDatasetWeightsVariableNameAfterCutpoint' that indicates whether the individual has a
* family history of the disease. If this value is set to None, the family history variable name before the
* cut-point is used.
* @param applyCovariateProfileBeforeCutpointUrl
* A URL to a CSV file containing the covariate (risk factor) profiles of the individuals for whom the absolute
* risk is to be computed before the cut-point.
* @param applyCovariateProfileAfterCutpointUrl
* A URL to a CSV file containing the covariate (risk factor) profiles of the individuals for whom the absolute
* risk is to be computed after the cut-point. If this value is undefined, the covariate profile before the
* cut-point is used.
* @param applySnpProfileUrl
* A URL to a CSV file containing the SNP profiles (values: {0: homozygous reference alleles, 1: heterozygous,
* 2: homozygous alternate alleles}) of the individuals for whom the absolute risk is to be computed. Missing
* values are permitted.
* @param cutpoint
* Integer age using which the absolute risk computation is split into before and after the cut-point. If a single
* integer is provided, all instances in the profiles ('applyCovariateProfileUrl' and/or 'applySnpProfileUrl') are
* assigned this cut-point. If a different cut-point needs to be assigned for each instance, provide an array of
* cut-points as integers of the same length as the number of instances in these profiles. If an array is provided,
* the parameters 'applyAgeStart' and 'applyAgeIntervalLength' must also be arrays of the same length.
* @param numImputations
* The number of imputations for handling missing SNPs.
* @param returnLinearPredictors
* Set true to return the calculated linear predictor values for each individual in the 'applyCovariateProfileUrl'
* and/or 'applySnpProfileUrl' datasets.
* @param returnReferenceRisks
* Set true to return the absolute risk estimates for each individual in the 'modelReferenceDatasetUrl' dataset.
* @param seed
* Fix a seed for reproducibility.
* @returns {Promise<{}|*>}
* An object with the following keys—
* 1) 'model':
* An object containing the model parameters. It contains two further keys: 'before_cutpoint' and
* 'after_cutpoint', each of which contains the model parameters before and after the cut-point,
* respectively.
* 2) 'profile':
* A records-oriented JSON of the input profile data, the specified age intervals, cut-points, and the
* calculated absolute risk estimates. If 'returnLinearPredictors' is set to true, they are also included
* as an additional column.
* 3) 'reference_risks':
* If 'returnReferenceRisks' is True, this key will be present in the returned dictionary. It will contain
* two arrays of objects with keys 'before_cutpoint' and 'after_cutpoint', each of which contains the
* reference risks for before and after the cut-point datasets, respectively. Each of these arrays
* contains objects, one per unique combination of the specified age intervals, containing age at the start
* of interval ('age_interval_start'), age at the end of interval ('age_interval_end'), and a list absolute
* risk estimates for the individuals in the reference dataset ('population_risks').
* 4) 'method':
* A string containing the name of the method used to calculate the absolute risk estimates. When this
* method is used, the method name is "iCARE - absolute risk with split intervals".
*/
async computeAbsoluteRiskSplitInterval(
{
applyAgeStart,
applyAgeIntervalLength,
modelDiseaseIncidenceRatesUrl,
modelCompetingIncidenceRatesUrl,
modelCovariateFormulaBeforeCutpointUrl,
modelCovariateFormulaAfterCutpointUrl,
modelLogRelativeRiskBeforeCutpointUrl,
modelLogRelativeRiskAfterCutpointUrl,
modelReferenceDatasetBeforeCutpointUrl,
modelReferenceDatasetAfterCutpointUrl,
modelReferenceDatasetWeightsVariableNameBeforeCutpoint,
modelReferenceDatasetWeightsVariableNameAfterCutpoint,
modelSnpInfoUrl,
modelFamilyHistoryVariableNameBeforeCutpoint,
modelFamilyHistoryVariableNameAfterCutpoint,
applyCovariateProfileBeforeCutpointUrl,
applyCovariateProfileAfterCutpointUrl,
applySnpProfileUrl,
cutpoint,
numImputations = 5,
returnLinearPredictors = false,
returnReferenceRisks = false,
seed = 1234,
}) {
if (!this.pyodide) {
throw new Error('Please instantiate this class using the WasmICARE.initialize() method.');
}
const fileURLs = [
modelDiseaseIncidenceRatesUrl,
modelCompetingIncidenceRatesUrl,
modelCovariateFormulaBeforeCutpointUrl,
modelCovariateFormulaAfterCutpointUrl,
modelLogRelativeRiskBeforeCutpointUrl,
modelLogRelativeRiskAfterCutpointUrl,
modelReferenceDatasetBeforeCutpointUrl,
modelReferenceDatasetAfterCutpointUrl,
modelSnpInfoUrl,
applyCovariateProfileBeforeCutpointUrl,
applyCovariateProfileAfterCutpointUrl,
applySnpProfileUrl,
].filter(url => url !== undefined).filter(url => !this.preloadedFiles.includes(url));
await this.fetchFilesAndWriteToPyodideFS(fileURLs);
applyAgeStart = this._valueOrNone(applyAgeStart);
applyAgeIntervalLength = this._valueOrNone(applyAgeIntervalLength);
modelDiseaseIncidenceRatesUrl = this._getFileNameOrNone(modelDiseaseIncidenceRatesUrl);
modelCompetingIncidenceRatesUrl = this._getFileNameOrNone(modelCompetingIncidenceRatesUrl);
modelCovariateFormulaBeforeCutpointUrl = this._getFileNameOrNone(modelCovariateFormulaBeforeCutpointUrl);
modelCovariateFormulaAfterCutpointUrl = this._getFileNameOrNone(modelCovariateFormulaAfterCutpointUrl);
modelLogRelativeRiskBeforeCutpointUrl = this._getFileNameOrNone(modelLogRelativeRiskBeforeCutpointUrl);
modelLogRelativeRiskAfterCutpointUrl = this._getFileNameOrNone(modelLogRelativeRiskAfterCutpointUrl);
modelReferenceDatasetBeforeCutpointUrl = this._getFileNameOrNone(modelReferenceDatasetBeforeCutpointUrl);
modelReferenceDatasetAfterCutpointUrl = this._getFileNameOrNone(modelReferenceDatasetAfterCutpointUrl);
modelReferenceDatasetWeightsVariableNameBeforeCutpoint = this._valueOrNone(modelReferenceDatasetWeightsVariableNameBeforeCutpoint);
modelReferenceDatasetWeightsVariableNameAfterCutpoint = this._valueOrNone(modelReferenceDatasetWeightsVariableNameAfterCutpoint);
modelSnpInfoUrl = this._getFileNameOrNone(modelSnpInfoUrl);
modelFamilyHistoryVariableNameBeforeCutpoint = this._valueOrNone(modelFamilyHistoryVariableNameBeforeCutpoint);
modelFamilyHistoryVariableNameAfterCutpoint = this._valueOrNone(modelFamilyHistoryVariableNameAfterCutpoint);
applyCovariateProfileBeforeCutpointUrl = this._getFileNameOrNone(applyCovariateProfileBeforeCutpointUrl);
applyCovariateProfileAfterCutpointUrl = this._getFileNameOrNone(applyCovariateProfileAfterCutpointUrl);
applySnpProfileUrl = this._getFileNameOrNone(applySnpProfileUrl);
cutpoint = this._valueOrNone(cutpoint);
numImputations = this._valueOrNone(numImputations);
returnLinearPredictors = returnLinearPredictors ? 'True' : 'False';
returnReferenceRisks = returnReferenceRisks ? 'True' : 'False';
seed = this._valueOrNone(seed);
let result = this.pyodide.runPython(`
result = icare.compute_absolute_risk_split_interval(
apply_age_start = ${applyAgeStart},
apply_age_interval_length = ${applyAgeIntervalLength},
model_disease_incidence_rates_path = ${modelDiseaseIncidenceRatesUrl},
model_competing_incidence_rates_path = ${modelCompetingIncidenceRatesUrl},
model_covariate_formula_before_cutpoint_path = ${modelCovariateFormulaBeforeCutpointUrl},
model_covariate_formula_after_cutpoint_path = ${modelCovariateFormulaAfterCutpointUrl},
model_log_relative_risk_before_cutpoint_path = ${modelLogRelativeRiskBeforeCutpointUrl},
model_log_relative_risk_after_cutpoint_path = ${modelLogRelativeRiskAfterCutpointUrl},
model_reference_dataset_before_cutpoint_path = ${modelReferenceDatasetBeforeCutpointUrl},
model_reference_dataset_after_cutpoint_path = ${modelReferenceDatasetAfterCutpointUrl},
model_reference_dataset_weights_variable_name_before_cutpoint = ${modelReferenceDatasetWeightsVariableNameBeforeCutpoint},
model_reference_dataset_weights_variable_name_after_cutpoint = ${modelReferenceDatasetWeightsVariableNameAfterCutpoint},
model_snp_info_path = ${modelSnpInfoUrl},
model_family_history_variable_name_before_cutpoint = ${modelFamilyHistoryVariableNameBeforeCutpoint},
model_family_history_variable_name_after_cutpoint = ${modelFamilyHistoryVariableNameAfterCutpoint},
apply_covariate_profile_before_cutpoint_path= ${applyCovariateProfileBeforeCutpointUrl},
apply_covariate_profile_after_cutpoint_path = ${applyCovariateProfileAfterCutpointUrl},
apply_snp_profile_path = ${applySnpProfileUrl},
cutpoint = ${cutpoint},
num_imputations = ${numImputations},
return_linear_predictors = ${returnLinearPredictors},
return_reference_risks = ${returnReferenceRisks},
seed = ${seed})
result
`).toJs();
if (result.isError) {
throw new Error(result.message);
}
result = this.convertOutputToJSON(result);
result['profile'] = JSON.parse(result['profile']);
return result;
}
/**
* This function is used to validate absolute risk models.
* @async
* @function
* @param studyDataUrl
* A URL to a CSV file containing the study data. The data must contain the following columns:
* 1) 'observed_outcome': the disease status { 0: censored; 1: disease occurred by the end of the follow-up
* period },
* 2) 'study_entry_age': age (in years) when entering the cohort,
* 3) 'study_exit_age': age (in years) at last follow-up visit,
* 4) 'time_of_onset': time (in years) from study entry to disease onset; note that all subjects are
* disease-free at the time of entry and those individuals who do not develop the disease by the end of the
* follow-up period are considered censored, and this value is set to 'inf'.
* 5) 'sampling_weights': for a case-control study nested within a cohort study, this is column is provided to
* indicate the probability of the inclusion of that individual into the nested case-control study. If the
* study is not a nested case-control study, do not include this column in the study data.
* @param predictedRiskInterval
* If the risk validation is to be performed over the total follow-up period, set this parameter to the string
* 'total-followup'. Otherwise, it should be set to either an integer or an array of integers representing the
* number of years after study entry over which, the estimated risk is being validated. Example: 5 for a 5-year
* risk validation.
* @param icareModelParameters
* An object containing the parameters of the absolute risk model to be validated. The keys of the object
* are the parameters of the 'computeAbsoluteRisk' function. If the risk prediction being validated is from a
* method other than iCARE, this parameter should be set to null and the 'predictedRiskVariableName' and
* 'linearPredictorVariableName' parameters should be set to the names of the columns containing the risk
* predictions and linear predictor values, respectively, in the study data.
* @param predictedRiskVariableName
* If the risk prediction is to be done by iCARE (i.e. using the computeAbsoluteRisk() method), set this value
* to null. Else, supply the risk predictions for each individual in the study data, using some other method,
* as an additional column in the study data. The name of that column should be supplied here as a string.
* @param linearPredictorVariableName
* The linear predictor is a risk score for an individual calculated as: Z * beta. Here, Z is a vector of risk
* factor values for that individual and beta is a vector of log relative risks. If the linear predictor values are
* to be calculated by iCARE (i.e. using the compute_absolute_risk() method), set this value to null. Else, supply
* the linear predictor values for each individual in the study data as an additional column in the study data.
* The name of that column should be supplied here.
* @param referenceEntryAge
* Specify an integer or an array of integers, representing the ages at entry for the reference population, to
* compute their absolute risks. If both 'referencePredictedRisks' and 'referenceLinearPredictors' are provided,
* this parameter is ignored.
* @param referenceExitAge
* Specify an integer or an array of integers, representing the ages at exit for the reference population, to
* compute their absolute risks. If both 'reference_predicted_risks' and 'reference_linear_predictors' are
* provided, this parameter is ignored.
* @param referencePredictedRisks
* An array of absolute risk estimates for the reference population assuming the entry ages specified at
* 'referenceEntryAge' and exit ages specified at 'referenceExitAge'. If both this parameter and
* 'referenceLinearPredictors' are provided, they are not re-computed using the computeAbsoluteRisk() method.
* @param referenceLinearPredictors
* An array of linear predictor values for the reference population assuming the entry ages specified at
* 'referenceEntryAge' and exit ages specified at 'referenceExitAge'. If both this parameter and
* 'referencePredictedRisks' are provided, they are not re-computed using the computeAbsoluteRisk() method.
* @param numberOfPercentiles
* The number of percentiles of the risk score that determines the number of strata over which, the risk prediction
* model is to be validated.
* @param linearPredictorCutoffs
* An array of user specified cut-points for the linear predictor to define categories for absolute risk
* calibration and relative risk calibration.
* @param datasetName
* Name of the validation dataset, e.g., "PLCO full cohort" or "Full cohort simulation".
* @param modelName
* Name of the absolute risk model being validated, e.g., "Synthetic model" or "Simulation setting".
* @param seed
* Fix a seed for reproducibility.
* @returns {Promise<{}|*>}
* An object with the following keys—
* 1) 'info':
* An object with the following keys:
* - 'risk_prediction_interval': A string describing the risk prediction interval e.g., "5 years". If
* the risk prediction is over the total follow-up period of the study, this reads
* "Observed follow-up". If each individual is assigned a different risk prediction interval, this
* reads "Varies across individuals".
* - 'dataset_name': The name of the validation dataset.
* - 'model_name': The name of the absolute risk model being validated.
* 2) 'study_data':
* A records-oriented JSON representation of the user-input study data. Additionally, the following columns
* are added to the study data:
* - 'predicted_risk_interval': The risk prediction interval for each individual in the study data
* based on the user-input parameter value for 'predictedRiskInterval'.
* - 'followup': The observed follow-up time for each individual in the study data after censoring and
* based on the user-input parameter value for 'predictedRiskInterval'.
* - 'risk_estimates': The estimated absolute risks for each individual in the study data based on the
* model specified by the user-input parameters. This column is only present when the
* 'predictedRiskVariableName' parameter is set to null.
* - 'linear_predictors': The estimated linear predictors for each individual in the study data based
* on the model specified by the user-input parameters. This column is only present when the
* 'linearPredictorVariableName' parameter is set to null.
* - 'linear_predictors_category': The category of the linear predictor for each individual in the
* study data based on the user-input parameter value for 'linearPredictorCutoffs', if provided, else
* based on 'numberOfPercentiles'.
* 3) 'reference':
* An object with two further keys: 'absolute_risk' and 'risk_score' containing the predicted absolute
* risks and linear predictors for the reference population, respectively. This key is only present when
* either both 'referenceEntryAge' and 'referenceExitAge' are provided to be calculated by iCARE, or
* pre-calculated 'referencePredictedRisks' and 'referenceLinearPredictors' are both directly provided by
* the user.
* 4) 'incidence_rates':
* The estimated age-specific incidence rates in the study and population as a data frame converted into
* the records-oriented JSON format. The columns of the data frame are "age" and "study_rate". When iCARE
* parameters are included (containing the disease incidence rates), "population_rate" is also included as
* a column.
* 5) 'auc':
* An object containing the area under the receiver operating characteristic curve (AUC), the variance,
* and the 95% confidence interval for the AUC. The object has the following keys: 'auc', 'variance',
* 'lower_ci', and 'upper_ci'.
* 6) 'expected_by_observed_ratio':
* An object containing the ratio of the expected and the observed number of cases in the study population,
* and the 95% confidence interval for the ratio. The dictionary has the following keys: 'ratio',
* 'lower_ci', and 'upper_ci'.
* 7) 'calibration':
* An object containing the calibration results. The dictionary has the following keys: 'absolute_risk',
* and 'relative_risk' containing the calibration results for absolute risk and relative risk,
* respectively. Each of these keys is a dictionary with the following information (associated key name):
* statistical testing method name ('method'), p-value ('p_value'), variance matrix ('variance'),
* test-statistic ('statistic'; with a sub-key containing 'chi_square' for the chi-squared metric), and
* parameters of the statistical test ('parameter'; with a sub-key 'degrees_of_freedom' for the degrees of
* freedom of the chi-squared distribution).
* 8) 'category_specific_calibration':
* A records-oriented JSON containing the category-specific calibration results. The columns of the data
* frame are: 'category', 'observed_absolute_risk', 'predicted_absolute_risk', 'lower_ci_absolute_risk',
* 'upper_ci_absolute_risk', 'observed_relative_risk', 'predicted_relative_risk', 'lower_ci_relative_risk',
* 'upper_ci_relative_risk'. The rows of the data frame are the categories of the risk score.
* 9) 'method':
* A string containing the name of the iCARE method being used. When this method is used, the method name
* is "iCARE - absolute risk model validation".
*/
async validateAbsoluteRiskModel(
{
studyDataUrl,
predictedRiskInterval,
icareModelParameters = {
applyAgeStart: undefined,
applyAgeIntervalLength: undefined,
modelDiseaseIncidenceRatesUrl: undefined,
modelCompetingIncidenceRatesUrl: undefined,
modelCovariateFormulaUrl: undefined,
modelLogRelativeRiskUrl: undefined,
modelReferenceDatasetUrl: undefined,
modelReferenceDatasetWeightsVariableName: undefined,
modelSnpInfoUrl: undefined,
modelFamilyHistoryVariableName: undefined,
numImputations: 5,
applyCovariateProfileUrl: undefined,
applySnpProfileUrl: undefined,
returnLinearPredictors: false,
returnReferenceRisks: false,
seed: 1234,
},
predictedRiskVariableName,
linearPredictorVariableName,
referenceEntryAge,
referenceExitAge,
referencePredictedRisks,
referenceLinearPredictors,
numberOfPercentiles = 10,
linearPredictorCutoffs,
datasetName = 'Example dataset',
modelName = 'Example risk prediction model',
seed = 1234,
}) {
if (!this.pyodide) {
throw new Error('Please instantiate this class using the WasmICARE.initialize() method.');
}
icareModelParameters = Object.assign({
applyAgeStart: undefined,
applyAgeIntervalLength: undefined,
modelDiseaseIncidenceRatesUrl: undefined,
modelCompetingIncidenceRatesUrl: undefined,
modelCovariateFormulaUrl: undefined,
modelLogRelativeRiskUrl: undefined,
modelReferenceDatasetUrl: undefined,
modelReferenceDatasetWeightsVariableName: undefined,
modelSnpInfoUrl: undefined,
modelFamilyHistoryVariableName: undefined,
numImputations: 5,
applyCovariateProfileUrl: undefined,
applySnpProfileUrl: undefined,
returnLinearPredictors: false,
returnReferenceRisks: false,
seed: 1234,
}, icareModelParameters);
if (icareModelParameters) {
const fileURLs = [
icareModelParameters.modelDiseaseIncidenceRatesUrl,
icareModelParameters.modelCompetingIncidenceRatesUrl,
icareModelParameters.modelCovariateFormulaUrl,
icareModelParameters.modelLogRelativeRiskUrl,
icareModelParameters.modelReferenceDatasetUrl,
icareModelParameters.modelSnpInfoUrl,
icareModelParameters.applyCovariateProfileUrl,
icareModelParameters.applySnpProfileUrl,
].filter(url => url !== undefined).filter(url => !this.preloadedFiles.includes(url));
await this.fetchFilesAndWriteToPyodideFS(fileURLs);
icareModelParameters.applyAgeStart = this._valueOrNone(icareModelParameters.applyAgeStart);
icareModelParameters.applyAgeIntervalLength = this._valueOrNone(icareModelParameters.applyAgeIntervalLength);
icareModelParameters.modelDiseaseIncidenceRatesUrl = this._getFileNameOrNone(icareModelParameters.modelDiseaseIncidenceRatesUrl);
icareModelParameters.modelCompetingIncidenceRatesUrl = this._getFileNameOrNone(icareModelParameters.modelCompetingIncidenceRatesUrl);
icareModelParameters.modelCovariateFormulaUrl = this._getFileNameOrNone(icareModelParameters.modelCovariateFormulaUrl);
icareModelParameters.modelLogRelativeRiskUrl = this._getFileNameOrNone(icareModelParameters.modelLogRelativeRiskUrl);
icareModelParameters.modelReferenceDatasetUrl = this._getFileNameOrNone(icareModelParameters.modelReferenceDatasetUrl);
icareModelParameters.modelReferenceDatasetWeightsVariableName = this._valueOrNone(icareModelParameters.modelReferenceDatasetWeightsVariableName);
icareModelParameters.modelSnpInfoUrl = this._getFileNameOrNone(icareModelParameters.modelSnpInfoUrl);
icareModelParameters.modelFamilyHistoryVariableName = this._valueOrNone(icareModelParameters.modelFamilyHistoryVariableName);
icareModelParameters.numImputations = this._valueOrNone(icareModelParameters.numImputations);
icareModelParameters.applyCovariateProfileUrl = this._getFileNameOrNone(icareModelParameters.applyCovariateProfileUrl);
icareModelParameters.applySnpProfileUrl = this._getFileNameOrNone(icareModelParameters.applySnpProfileUrl);
icareModelParameters.returnLinearPredictors = icareModelParameters.returnLinearPredictors ? 'True' : 'False';
icareModelParameters.returnReferenceRisks = icareModelParameters.returnReferenceRisks ? 'True' : 'False';
icareModelParameters.seed = this._valueOrNone(icareModelParameters.seed);
icareModelParameters = `{
'apply_age_start': ${icareModelParameters.applyAgeStart},
'apply_age_interval_length': ${icareModelParameters.applyAgeIntervalLength},
'model_disease_incidence_rates_path': ${icareModelParameters.modelDiseaseIncidenceRatesUrl},
'model_competing_incidence_rates_path': ${icareModelParameters.modelCompetingIncidenceRatesUrl},
'model_covariate_formula_path': ${icareModelParameters.modelCovariateFormulaUrl},
'model_log_relative_risk_path': ${icareModelParameters.modelLogRelativeRiskUrl},
'model_reference_dataset_path': ${icareModelParameters.modelReferenceDatasetUrl},
'model_reference_dataset_weights_variable_name': ${icareModelParameters.modelReferenceDatasetWeightsVariableName},
'model_snp_info_path': ${icareModelParameters.modelSnpInfoUrl},
'model_family_history_variable_name': ${icareModelParameters.modelFamilyHistoryVariableName},
'num_imputations': ${icareModelParameters.numImputations},
'apply_covariate_profile_path': ${icareModelParameters.applyCovariateProfileUrl},
'apply_snp_profile_path': ${icareModelParameters.applySnpProfileUrl},
'return_linear_predictors': ${icareModelParameters.returnLinearPredictors},
'return_reference_risks': ${icareModelParameters.returnReferenceRisks},
'seed': ${icareModelParameters.seed}}`;
} else {
icareModelParameters = 'None'
}
const fileURLs = [studyDataUrl].filter(url => url !== undefined);
await this.fetchFilesAndWriteToPyodideFS(fileURLs);
studyDataUrl = this._getFileNameOrNone(studyDataUrl);
predictedRiskInterval = this._valueOrNone(predictedRiskInterval);
predictedRiskVariableName = this._valueOrNone(predictedRiskVariableName);
linearPredictorVariableName = this._valueOrNone(linearPredictorVariableName);
referenceEntryAge = this._valueOrNone(referenceEntryAge);
referenceExitAge = this._valueOrNone(referenceExitAge);
referencePredictedRisks = this._valueOrNone(referencePredictedRisks);
referenceLinearPredictors = this._valueOrNone(referenceLinearPredictors);
numberOfPercentiles = this._valueOrNone(numberOfPercentiles);
linearPredictorCutoffs = this._valueOrNone(linearPredictorCutoffs);
datasetName = this._valueOrNone(datasetName);
modelName = this._valueOrNone(modelName);
seed = this._valueOrNone(seed);
let result = this.pyodide.runPython(`
result = icare.validate_absolute_risk_model(
study_data_path = ${studyDataUrl},
predicted_risk_interval = ${predictedRiskInterval},
icare_model_parameters = ${icareModelParameters},
predicted_risk_variable_name = ${predictedRiskVariableName},
linear_predictor_variable_name = ${linearPredictorVariableName},
reference_entry_age = ${referenceEntryAge},
reference_exit_age = ${referenceExitAge},
reference_predicted_risks = ${referencePredictedRisks},
reference_linear_predictors = ${referenceLinearPredictors},
number_of_percentiles = ${numberOfPercentiles},
linear_predictor_cutoffs = ${linearPredictorCutoffs},
dataset_name = ${datasetName},
model_name = ${modelName},
seed = ${seed})
result
`).toJs();
if (result.isError) {
throw new Error(result.message);
}
result = this.convertOutputToJSON(result);
result['study_data'] = JSON.parse(result['study_data']);
result['incidence_rates'] = JSON.parse(result['incidence_rates']);
result['category_specific_calibration'] = JSON.parse(result['category_specific_calibration']);
return result;
}
}
/**
* Function to load Wasm-iCARE. The returned class instance has all the functionalities of Py-iCARE.
* @async
* @function
* @returns {Promise<WasmICARE>}
*/
async function loadWasmICARE() {
return await WasmICARE.initialize();
}
export {
loadWasmICARE
};