-
Notifications
You must be signed in to change notification settings - Fork 4
/
AnnotatedExample.java
132 lines (108 loc) · 5.38 KB
/
AnnotatedExample.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package javastraw;
import javastraw.reader.Dataset;
import javastraw.reader.basics.Chromosome;
import javastraw.reader.block.Block;
import javastraw.reader.block.ContactRecord;
import javastraw.reader.mzd.Matrix;
import javastraw.reader.mzd.MatrixZoomData;
import javastraw.reader.norm.NormalizationPicker;
import javastraw.reader.type.HiCZoom;
import javastraw.reader.type.NormalizationType;
import javastraw.tools.HiCFileTools;
import java.util.Iterator;
import java.util.List;
public class AnnotatedExample {
public static void main(String[] args) {
// do you want to cache portions of the file?
// this uses more RAM, but if you want to repeatedly
// query nearby regions, it can improve speed by a lot
boolean useCache = false;
String filename = "file.hic";
// create a hic dataset object
Dataset ds = HiCFileTools.extractDatasetForCLT(filename, false, useCache, false);
// pick the normalization we would like
// this line will check multiple possible norms
// and pick whichever is available (in order of preference)
NormalizationType norm = NormalizationPicker.getFirstValidNormInThisOrder(ds, new String[]{"KR", "SCALE", "VC", "VC_SQRT", "NONE"});
System.out.println("Norm being used: " + norm.getLabel());
// let's set our resolution
int resolution = 5000;
// let's grab the chromosomes
Chromosome[] chromosomes = ds.getChromosomeHandler().getChromosomeArrayWithoutAllByAll();
// now let's iterate on every chromosome (only intra-chromosomal regions for now)
for (Chromosome chromosome : chromosomes) {
Matrix matrix = ds.getMatrix(chromosome, chromosome);
if (matrix == null) continue;
MatrixZoomData zd = matrix.getZoomData(new HiCZoom(resolution));
if (zd == null) continue;
// zd is now a data structure that contains pointers to the data
// *** Let's show 2 different ways to access data ***
// OPTION 1
// iterate on all the data for the whole chromosome in sparse format
Iterator<ContactRecord> iterator = zd.getNormalizedIterator(norm);
while (iterator.hasNext()) {
ContactRecord record = iterator.next();
// now do whatever you want with the contact record
int binX = record.getBinX();
int binY = record.getBinY();
float counts = record.getCounts();
// binX and binY are in BIN coordinates, not genome coordinates
// to switch, we can just multiply by the resolution
int genomeX = binX * resolution;
int genomeY = binY * resolution;
if (counts > 0) { // will skip NaNs
// do task
System.out.println(genomeX + " " + genomeY + " " + counts);
// the iterator only iterates above the diagonal
// to also fill in data below the diagonal, flip it
if (binX != binY) {
binX = record.getBinY();
binY = record.getBinX();
counts = record.getCounts();
genomeX = binX * resolution;
genomeY = binY * resolution;
System.out.println(genomeX + " " + genomeY + " " + counts);
// do task
}
}
}
// OPTION 2
// just grab sparse data for a specific region
// choose your setting for when the diagonal is in the region
boolean getDataUnderTheDiagonal = true;
// our bounds will be binXStart, binYStart, binXEnd, binYEnd
// these are in BIN coordinates, not genome coordinates
int binXStart = 500, binYStart = 600, binXEnd = 1000, binYEnd = 1200;
List<Block> blocks = zd.getNormalizedBlocksOverlapping(binXStart, binYStart, binXEnd, binYEnd, norm, getDataUnderTheDiagonal);
for (Block b : blocks) {
if (b != null) {
for (ContactRecord rec : b.getContactRecords()) {
if (rec.getCounts() > 0) { // will skip NaNs
// can choose to use the BIN coordinates
int binX = rec.getBinX();
int binY = rec.getBinY();
// you could choose to use relative coordinates for the box given
int relativeX = rec.getBinX() - binXStart;
int relativeY = rec.getBinY() - binYStart;
float counts = rec.getCounts();
}
}
}
}
}
// to iterate over the whole genome
for (int i = 0; i < chromosomes.length; i++) {
for (int j = i; i < chromosomes.length; i++) {
Matrix matrix = ds.getMatrix(chromosomes[i], chromosomes[j]);
if (matrix == null) continue;
MatrixZoomData zd = matrix.getZoomData(new HiCZoom(resolution));
if (zd == null) continue;
if (i == j) {
// intra-chromosomal region
} else {
// inter-chromosomal region
}
}
}
}
}