Skip to content

Commit

Permalink
critical bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sa501428 committed Jun 30, 2022
1 parent 7350ffa commit 841adb1
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 50 deletions.
2 changes: 1 addition & 1 deletion src/hic/HiCGlobals.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
* @since 11/25/14
*/
public class HiCGlobals {
public static final String versionNum = "3.19.00";
public static final String versionNum = "3.20.00";
public static final int writingVersion = 9;
public static final int bufferSize = 2097152;
public static int MAX_PEARSON_ZOOM = 50000;
Expand Down
22 changes: 20 additions & 2 deletions src/hic/tools/utils/norm/CustomNormVectorFileHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
import javastraw.reader.DatasetReaderV2;
import javastraw.reader.basics.Chromosome;
import javastraw.reader.basics.ChromosomeHandler;
import javastraw.reader.block.ContactRecord;
import javastraw.reader.datastructures.ListOfDoubleArrays;
import javastraw.reader.datastructures.ListOfFloatArrays;
import javastraw.reader.expected.ExpectedValueFunction;
import javastraw.reader.mzd.MatrixZoomData;
import javastraw.reader.norm.NormalizationVector;
Expand Down Expand Up @@ -147,8 +149,24 @@ private static void handleLoadedVector(NormalizationType customNormType, final i
int sizeInBytes = (int) (newPos - position);
normVectorIndex.add(new NormalizationVectorIndexEntry(
customNormType.toString(), chrIndx, zoom.getUnit().toString(), zoom.getBinSize(), position, sizeInBytes));

evLoaded.addDistancesFromIterator(chrIndx, zd.getDirectIterator(), vector.getData().convertToFloats());

addDistancesFromIterator(chrIndx, zd.getDirectIterator(), vector.getData().convertToFloats(), evLoaded);
}
}

private static void addDistancesFromIterator(int chrIndx, Iterator<ContactRecord> iterator,
ListOfFloatArrays vector, ExpectedValueCalculation ev) {
while (iterator.hasNext()) {
ContactRecord cr = iterator.next();
int x = cr.getBinX();
int y = cr.getBinY();
final float counts = cr.getCounts();
float xVal = vector.get(x);
float yVal = vector.get(y);
if (xVal > 0 & yVal > 0) {
double value = counts / (xVal * yVal);
ev.addDistance(chrIndx, x, y, value);
}
}
}

Expand Down
23 changes: 12 additions & 11 deletions src/hic/tools/utils/norm/NormalizationVectorUpdater.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ protected static void printNormTiming(String norm, Chromosome chr, HiCZoom zoom,

protected static void updateExpectedValueCalculationForChr(final int chrIdx, NormalizationCalculations nc,
ListOfFloatArrays vec, NormalizationType type,
HiCZoom zoom, MatrixZoomData zd,
HiCZoom zoom, BigContactArray ba,
ExpectedValueCalculation ev,
BigListOfByteWriters normVectorBuffers,
List<NormalizationVectorIndexEntry> normVectorIndex) throws IOException {
double factor = nc.getSumFactor(vec);
vec.multiplyEverythingBy(factor);
updateNormVectorIndexWithVector(normVectorIndex, normVectorBuffers, vec, chrIdx, type, zoom);
ev.addDistancesFromIterator(chrIdx, zd.getDirectIterator(), vec);
ba.updateGenomeWideExpected(chrIdx, vec, ev);
}

protected void reEvaluateWhichIntraNormsToBuild(List<NormalizationType> normalizationsToBuild) {
Expand Down Expand Up @@ -137,7 +137,7 @@ public void updateHicFile(String path, List<NormalizationType> normalizationsToB
// Loop through chromosomes
for (Chromosome chrom : chromosomeHandler.getChromosomeArrayWithoutAllByAll()) {

Matrix matrix = ds.getMatrix(chrom, chrom, zoom.getBinSize());
Matrix matrix = ds.getMatrix(chrom, chrom);
if (matrix == null) continue;
MatrixZoomData zd = matrix.getZoomData(zoom);
if (zd == null) continue;
Expand All @@ -147,22 +147,22 @@ public void updateHicFile(String path, List<NormalizationType> normalizationsToB
}

BigContactArray ba = BigContactArrayCreator.createFromZD(zd);
matrix.clearCacheForZoom(zoom);

GWNorms.addGWNormsToBuffer(gwNormalizations, gwNormMaps, chrom, normVectorIndices,
normVectorBuffers, zoom, gwMapExpected, ba);
GWNorms.addGWNormsToBuffer(interNormalizations, gwNormMaps, chrom, normVectorIndices,
normVectorBuffers, zoom, gwMapExpected, ba);

NormalizationCalculations nc = new NormalizationCalculations(ba, zd.getBinSize());
matrix.clearCacheForZoom(zoom);
NormalizationCalculations nc = new NormalizationCalculations(ba, zoom.getBinSize());

if (weShouldBuildVC || weShouldBuildVCSqrt || weShouldBuildScale) {
boolean saveVC = weShouldBuildVC && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.VC);
boolean saveVCSqrt = weShouldBuildVCSqrt && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.VC_SQRT);
boolean saveScale = weShouldBuildScale && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.SCALE);

if (saveVC || saveVCSqrt || saveScale) {
buildTheNorms(saveVC, saveVCSqrt, saveScale, chrom, nc, zoom, zd, evVC, evVCSqrt, evSCALE);
buildTheNorms(saveVC, saveVCSqrt, saveScale, chrom, nc, zoom, evVC, evVCSqrt, evSCALE, ba);
}
}

Expand Down Expand Up @@ -191,8 +191,9 @@ public void updateHicFile(String path, List<NormalizationType> normalizationsToB
}

private void buildTheNorms(boolean saveVC, boolean saveVCSqrt, boolean saveScale, Chromosome chr,
NormalizationCalculations nc, HiCZoom zoom, MatrixZoomData zd,
ExpectedValueCalculation evVC, ExpectedValueCalculation evVCSqrt, ExpectedValueCalculation evSCALE) throws IOException {
NormalizationCalculations nc, HiCZoom zoom, ExpectedValueCalculation evVC,
ExpectedValueCalculation evVCSqrt, ExpectedValueCalculation evSCALE,
BigContactArray ba) throws IOException {

final int chrIdx = chr.getIndex();
ListOfFloatArrays vc = nc.computeVC();
Expand All @@ -204,19 +205,19 @@ private void buildTheNorms(boolean saveVC, boolean saveVCSqrt, boolean saveScale
if (scale == null) {
scaleBPFailChroms.add(chr);
} else {
updateExpectedValueCalculationForChr(chrIdx, nc, scale, NormalizationHandler.SCALE, zoom, zd, evSCALE, normVectorBuffers, normVectorIndices);
updateExpectedValueCalculationForChr(chrIdx, nc, scale, NormalizationHandler.SCALE, zoom, ba, evSCALE, normVectorBuffers, normVectorIndices);
}
}
}
if (saveVC) {
updateExpectedValueCalculationForChr(chrIdx, nc, vc, NormalizationHandler.VC, zoom, zd, evVC, normVectorBuffers, normVectorIndices);
updateExpectedValueCalculationForChr(chrIdx, nc, vc, NormalizationHandler.VC, zoom, ba, evVC, normVectorBuffers, normVectorIndices);
}
if (saveVCSqrt) {
ListOfFloatArrays vcSqrt = new ListOfFloatArrays(vc.getLength());
for (int i = 0; i < vc.getLength(); i++) {
vcSqrt.set(i, (float) Math.sqrt(vc.get(i)));
}
updateExpectedValueCalculationForChr(chrIdx, nc, vcSqrt, NormalizationHandler.VC_SQRT, zoom, zd, evVCSqrt, normVectorBuffers, normVectorIndices);
updateExpectedValueCalculationForChr(chrIdx, nc, vcSqrt, NormalizationHandler.VC_SQRT, zoom, ba, evVCSqrt, normVectorBuffers, normVectorIndices);
}
}
}
48 changes: 12 additions & 36 deletions src/hic/tools/utils/original/ExpectedValueCalculation.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2011-2022 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine
* Copyright (c) 2020-2022 Rice University, Baylor College of Medicine, Aiden Lab
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -27,15 +27,12 @@

import javastraw.reader.basics.Chromosome;
import javastraw.reader.basics.ChromosomeHandler;
import javastraw.reader.block.ContactRecord;
import javastraw.reader.datastructures.ListOfDoubleArrays;
import javastraw.reader.datastructures.ListOfFloatArrays;
import javastraw.reader.expected.ExpectedValueFunctionImpl;
import javastraw.reader.type.HiCZoom;
import javastraw.reader.type.NormalizationType;

import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

Expand Down Expand Up @@ -138,23 +135,18 @@ public int getGridSize() {
* @param bin2 Position2 observed in units of "bins"
*/
public synchronized void addDistance(Integer chrIdx, int bin1, int bin2, double weight) {

// Ignore NaN values TODO -- is this the right thing to do?
if (Double.isNaN(weight)) return;

int dist;
Chromosome chr = chromosomesMap.get(chrIdx);
if (chr == null) return;

if (chromosomeCounts.containsKey(chrIdx)) {
double count = chromosomeCounts.get(chrIdx);
chromosomeCounts.put(chrIdx, count + weight);
} else {
chromosomeCounts.put(chrIdx, weight);
if (weight > 0) {
Chromosome chr = chromosomesMap.get(chrIdx);
if (chr == null) return;
if (chromosomeCounts.containsKey(chrIdx)) {
double count = chromosomeCounts.get(chrIdx);
chromosomeCounts.put(chrIdx, count + weight);
} else {
chromosomeCounts.put(chrIdx, weight);
}
int dist = Math.abs(bin1 - bin2);
actualDistances[dist] += weight;
}
dist = Math.abs(bin1 - bin2);

actualDistances[dist] += weight; // Math.log(1 + weight);
}

public void merge(ExpectedValueCalculation otherEVCalc) {
Expand Down Expand Up @@ -319,22 +311,6 @@ public ExpectedValueFunctionImpl getExpectedValueFunction() {
computeDensity();
return new ExpectedValueFunctionImpl(type, HiCZoom.HiCUnit.BP, gridSize, densityAvg, chrScaleFactors);
}

// TODO: this is often inefficient, we have all of the contact records when we leave norm calculations, should do this there if possible
public void addDistancesFromIterator(int chrIndx, Iterator<ContactRecord> iterator, ListOfFloatArrays vector) {
while (iterator.hasNext()) {
ContactRecord cr = iterator.next();
int x = cr.getBinX();
int y = cr.getBinY();
final float counts = cr.getCounts();
float xVal = vector.get(x);
float yVal = vector.get(y);
if (xVal > 0 & yVal > 0) {
double value = counts / (xVal * yVal);
addDistance(chrIndx, x, y, value);
}
}
}
}


Expand Down

0 comments on commit 841adb1

Please sign in to comment.