Skip to content

Commit

Permalink
Merge pull request #114 from EricSDavis/master
Browse files Browse the repository at this point in the history
Add function for reading available normalizations from .hic file.
  • Loading branch information
sa501428 authored Aug 31, 2022
2 parents bc91874 + b57a3a6 commit 2525edc
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ dist
env
hic_straw.egg-info

.Rproj.user
2 changes: 1 addition & 1 deletion R/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ Encoding: UTF-8
Imports: Rcpp
LinkingTo: Rcpp
NeedsCompilation: yes
RoxygenNote: 7.1.1
RoxygenNote: 7.2.0
1 change: 1 addition & 0 deletions R/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(readHicBpResolutions)
export(readHicChroms)
export(readHicNormTypes)
export(straw)
import(Rcpp)
useDynLib(strawr)
23 changes: 17 additions & 6 deletions R/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ straw <- function(norm, fname, chr1loc, chr2loc, unit, binsize, matrix = "observ
.Call('_strawr_straw', PACKAGE = 'strawr', norm, fname, chr1loc, chr2loc, unit, binsize, matrix)
}

#' Function for reading chromosomes from .hic file
#'
#' @param fname path to .hic file
#' @return Data frame of chromosome names and lengths
#' @examples
#' readHicChroms(system.file("extdata", "test.hic", package = "strawr"))
#' @export
readHicChroms <- function(fname) {
.Call('_strawr_readHicChroms', PACKAGE = 'strawr', fname)
}

#' Function for reading basepair resolutions from .hic file
#'
#' @param fname path to .hic file
Expand All @@ -41,14 +52,14 @@ readHicBpResolutions <- function(fname) {
.Call('_strawr_readHicBpResolutions', PACKAGE = 'strawr', fname)
}

#' Function for reading chromosomes from .hic file
#'
#' Function for reading available normalizations from .hic file
#'
#' @param fname path to .hic file
#' @return Data frame of chromosome names and lengths
#' @return Vector of available normalizations
#' @examples
#' readHicChroms(system.file("extdata", "test.hic", package = "strawr"))
#' readHicNormTypes(system.file("extdata", "test.hic", package = "strawr"))
#' @export
readHicChroms <- function(fname) {
.Call('_strawr_readHicChroms', PACKAGE = 'strawr', fname)
readHicNormTypes <- function(fname) {
.Call('_strawr_readHicNormTypes', PACKAGE = 'strawr', fname)
}

20 changes: 20 additions & 0 deletions R/man/readHicNormTypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 22 additions & 5 deletions R/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

using namespace Rcpp;

#ifdef RCPP_USE_GLOBAL_ROSTREAM
Rcpp::Rostream<true>& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// straw
Rcpp::DataFrame straw(std::string norm, std::string fname, std::string chr1loc, std::string chr2loc, const std::string& unit, int32_t binsize, std::string matrix);
RcppExport SEXP _strawr_straw(SEXP normSEXP, SEXP fnameSEXP, SEXP chr1locSEXP, SEXP chr2locSEXP, SEXP unitSEXP, SEXP binsizeSEXP, SEXP matrixSEXP) {
Expand All @@ -22,6 +27,17 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// readHicChroms
Rcpp::DataFrame readHicChroms(std::string fname);
RcppExport SEXP _strawr_readHicChroms(SEXP fnameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type fname(fnameSEXP);
rcpp_result_gen = Rcpp::wrap(readHicChroms(fname));
return rcpp_result_gen;
END_RCPP
}
// readHicBpResolutions
Rcpp::NumericVector readHicBpResolutions(std::string fname);
RcppExport SEXP _strawr_readHicBpResolutions(SEXP fnameSEXP) {
Expand All @@ -33,22 +49,23 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// readHicChroms
Rcpp::DataFrame readHicChroms(std::string fname);
RcppExport SEXP _strawr_readHicChroms(SEXP fnameSEXP) {
// readHicNormTypes
Rcpp::CharacterVector readHicNormTypes(std::string fname);
RcppExport SEXP _strawr_readHicNormTypes(SEXP fnameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type fname(fnameSEXP);
rcpp_result_gen = Rcpp::wrap(readHicChroms(fname));
rcpp_result_gen = Rcpp::wrap(readHicNormTypes(fname));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_strawr_straw", (DL_FUNC) &_strawr_straw, 7},
{"_strawr_readHicBpResolutions", (DL_FUNC) &_strawr_readHicBpResolutions, 1},
{"_strawr_readHicChroms", (DL_FUNC) &_strawr_readHicChroms, 1},
{"_strawr_readHicBpResolutions", (DL_FUNC) &_strawr_readHicBpResolutions, 1},
{"_strawr_readHicNormTypes", (DL_FUNC) &_strawr_readHicNormTypes, 1},
{NULL, NULL, 0}
};

Expand Down
126 changes: 124 additions & 2 deletions R/src/straw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1291,21 +1291,23 @@ vector<chromosome> getChromosomes(string fname){
//' Function for reading chromosomes from .hic file
//'
//' @param fname path to .hic file
//' @return Data frame of chromosome names and lengths
//' @return Data frame of chromosome indices, names and lengths
//' @examples
//' readHicChroms(system.file("extdata", "test.hic", package = "strawr"))
//' @export
// [[Rcpp::export]]
Rcpp::DataFrame readHicChroms(std::string fname)
{
vector<chromosome> chroms = getChromosomes(std::move(fname));
Rcpp::NumericVector indices;
Rcpp::StringVector names;
Rcpp::NumericVector lengths;
for (std::vector<chromosome>::iterator it = chroms.begin(); it != chroms.end(); ++it) {
indices.push_back(it->index);
names.push_back(it->name);
lengths.push_back(it->length);
}
return Rcpp::DataFrame::create(Rcpp::Named("name") = names, Rcpp::Named("length") = lengths);
return Rcpp::DataFrame::create(Rcpp::Named("index") = indices, Rcpp::Named("name") = names, Rcpp::Named("length") = lengths);
}

//' Function for reading basepair resolutions from .hic file
Expand All @@ -1326,3 +1328,123 @@ Rcpp::NumericVector readHicBpResolutions(std::string fname)
hiCFile->close();
return bpResolutions;
}

// Reads all normalizations from the footer
Rcpp::CharacterVector readNormsFromFooter(istream &fin, int64_t master, int32_t version) {

// Initialize variable to store norm types
Rcpp::CharacterVector normTypes;

// Read through the footer section
//nBytes
if (version > 8) {
readInt64FromFile(fin);
} else {
readInt32FromFile(fin);
}

// nEntries
int32_t nEntries = readInt32FromFile(fin);
for (int i = 0; i < nEntries; i++) {
string str;
getline(fin, str, '\0');
readInt64FromFile(fin); //fpos
readInt32FromFile(fin); //sizeInBytes
}

// nExpectedValues
int32_t nExpectedValues = readInt32FromFile(fin);
for (int i = 0; i < nExpectedValues; i++) {
string unit0;
getline(fin, unit0, '\0'); //unit
readInt32FromFile(fin);

int64_t nValues;
if (version > 8) {
nValues = readInt64FromFile(fin);
for (int j = 0; j < nValues; j++) {
readFloatFromFile(fin);
}
} else {
nValues = (int64_t) readInt32FromFile(fin);
for (int j = 0; j < nValues; j++) {
readDoubleFromFile(fin);
}
}

int32_t nNormalizationFactors = readInt32FromFile(fin);
for (int j = 0; j < nNormalizationFactors; j++) {
readInt32FromFile(fin); //chrIdx
if (version > 8) {
readFloatFromFile(fin); //v
} else {
readDoubleFromFile(fin);//v
}
}
}

// Needs to be read like this (readInt32FromFile doesn't work)
fin.read((char*)&nExpectedValues, sizeof(int32_t));
for (int i = 0; i < nExpectedValues; i++) {
//Record available norm types (handling empty strings as NONE)
string type;
getline(fin, type, '\0'); //typeString
if (type == "") {
type = "NONE";
}
normTypes.push_back(type);

string unit0;
getline(fin, unit0, '\0'); //unit
readInt32FromFile(fin);

int64_t nValues;
if (version > 8) {
nValues = readInt64FromFile(fin);
for (int j = 0; j < nValues; j++) {
readFloatFromFile(fin); //v
}
} else {
nValues = (int64_t) readInt32FromFile(fin);
for (int j = 0; j < nValues; j++) {
readDoubleFromFile(fin); //v
}
}

int32_t nNormalizationFactors = readInt32FromFile(fin);
for (int j = 0; j < nNormalizationFactors; j++) {
readInt32FromFile(fin); //chrIdx
if (version > 8) {
readFloatFromFile(fin); //v
} else {
readDoubleFromFile(fin); //v
}
}
}

// Include "NONE"
normTypes.push_back("NONE");

// Return unique norms
return unique(normTypes);
}

//' Function for reading available normalizations from .hic file
//'
//' @param fname path to .hic file
//' @return Vector of available normalizations
//' @examples
//' readHicNormTypes(system.file("extdata", "test.hic", package = "strawr"))
//' @export
// [[Rcpp::export]]
Rcpp::CharacterVector readHicNormTypes(std::string fname)
{
HiCFile *hiCFile = new HiCFile(std::move(fname));
Rcpp::CharacterVector normTypes;
hiCFile->fin.seekg(hiCFile->master, ios::beg);
normTypes = readNormsFromFooter(hiCFile->fin,
hiCFile->master,
hiCFile->version);
hiCFile->close();
return normTypes;
}

0 comments on commit 2525edc

Please sign in to comment.