Skip to content

Commit

Permalink
Merge pull request #3 from 4dn-dcic/dev
Browse files Browse the repository at this point in the history
code clean up
  • Loading branch information
SooLee authored Feb 15, 2017
2 parents 42330fe + 02d73c6 commit d77acc4
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 22 deletions.
40 changes: 19 additions & 21 deletions pairsqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def __init__(self, chromsize_file):
self.total_len=0
with open(chromsize_file,'r') as f:
for line in f:
chr, size = line.strip().split('\t')
self.chrsize[chr] = int(size)
chrom, size = line.strip().split('\t')
self.chrsize[chrom] = int(size)
self.total_len += int(size)
self.nChr = len(self.chrsize)

Expand Down Expand Up @@ -102,9 +102,9 @@ def __init__(self, orientation_list, gs, pseudocount=1E-100):
def increment(self, orientation, chr):
"""increment both count_per_ori and count_per_chr together, so that we don't count the read on a weird chromosome for orientation and vice versa"""
if orientation in self.orientation_list: # skip if not included in orientation list
if chr in self.chr_list: # skip if not included in chr list
if chrom in self.chr_list: # skip if not included in chr list
self.count_per_ori[orientation] += 1
self.count_per_chr[chr] += 1
self.count_per_chr[chrom] += 1

def calculate_sumcount(self):
self.sumcount = sum(self.count_per_ori.values())
Expand All @@ -129,21 +129,21 @@ def calculate_contact_probability_per_chr(self, s, bin_size):
"""Calculate contact probability for a given separation distance and bin size
s is the representative log10 separation distance.
"""
for chr in self.chr_list:
self.allpossible_count_per_chr[chr] = self.gs.chrsize[chr] - 10**s - 1
if self.allpossible_count_per_chr[chr] <= 0: # the chromosome is smaller than s
self.allpossible_count_per_chr[chr] = 0
self.prob_per_chr[chr] = 0
for chrom in self.chr_list:
self.allpossible_count_per_chr[chrom] = self.gs.chrsize[chrom] - 10**s - 1
if self.allpossible_count_per_chr[chrom] <= 0: # the chromosome is smaller than s
self.allpossible_count_per_chr[chrom] = 0
self.prob_per_chr[chrom] = 0
else:
self.prob_per_chr[chr] = self.count_per_chr[chr] / self.allpossible_count_per_chr[chr] / bin_size
self.log10prob_per_chr[chr] = math.log10(self.prob_per_chr[chr] + self.pseudocount)
self.prob_per_chr[chrom] = self.count_per_chr[chrom] / self.allpossible_count_per_chr[chrom] / bin_size
self.log10prob_per_chr[chrom] = math.log10(self.prob_per_chr[chrom] + self.pseudocount)

def calculate_contact_probability(self, s, bin_size):
"""Calculate contact probability for a given separation distance and bin size
s is the representative log10 separation distance.
"""
self.allpossible_sumcount = sum(self.allpossible_count_per_chr.values())
self.prob = self.sumcount / self.allpossible_sumcount / bin_size
self.allpossible_sumcount = sum(self.allpossible_count_per_chr.values())
self.prob = self.sumcount / self.allpossible_sumcount / bin_size
self.log10prob = math.log10(self.prob + self.pseudocount)

def print_content(self, fout, bin_mid, bin_range_string):
Expand Down Expand Up @@ -207,9 +207,9 @@ def get_bin_mid(self, bin_number):
return(bin_number * self.log_binsize + self.log_binsize/2)

def get_bin_number(self, distance):
log_distance = math.log10(distance)
log_distance = math.log10(distance)
bin_number = int(log_distance / self.log_binsize)
return(bin_number)
return(bin_number)

def get_bin_range_string(self, bin_mid):
minval = int(round(10**(bin_mid - self.log_binsize/2)))
Expand All @@ -218,11 +218,9 @@ def get_bin_range_string(self, bin_mid):


def get_distance_and_orientation (line, cols):
"""return distance and orientation
given a list representing a line from the pairs input file and a ColIndices object
"""
"""return distance and orientation, given a list representing a line from the pairs input file and a ColIndices object """
distance = int(line[cols.pos2]) - int(line[cols.pos1])

# distance will always be > 0 for upper triangle, but in case it is not true.
if distance > 0:
orientation = str(line[cols.strand1]) + str(line[cols.strand2])
Expand Down Expand Up @@ -265,7 +263,7 @@ def distance_histogram (pairs_file, chromsize_file, outdir='report', cols=cols_p
"""create a log10-scale binned histogram table for read separation distance histogram
The histogram is stratefied by read orientation (4 different orientations)
The table includes raw counts, log10 counts (pseudocounts added), contact probability, log10 contact probability, and proportions for orientation (pseudocounts added)
Bin is represented by the mid value at the log10 scale.
Bin is represented by the mid value at the log10 scale.
log_binsize: distance bin size in log10 scale.
"""
gs = GenomeSize(chromsize_file)
Expand Down Expand Up @@ -346,7 +344,7 @@ def distance_histogram (pairs_file, chromsize_file, outdir='report', cols=cols_p
cols = cols_merged_nodups
orientation_list = orientation_list_merged_nodups
elif args.input_type == 'OM':
cols = cols_old_merged_nodups
cols = cols_old_merged_nodups
orientation_list = orientation_list_merged_nodups
else:
print("Unknown input type"); exit(1)
Expand Down
2 changes: 1 addition & 1 deletion plot.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ rainbow_w_offset <- function(L, offset = NA){
}

library(devtools)
#install_url("https://github.com/SooLee/Soo.plot.base/archive/0.9.0.zip")
install_url("https://github.com/SooLee/Soo.plot.base/archive/0.9.0.zip")
library(Soo.plot.base)

exp_axis<-function(x, axis_ind, n=5){
Expand Down

0 comments on commit d77acc4

Please sign in to comment.