Merge pull request #31 from jfnavarro/master

Merge from base
SpatialTranscriptomicsResearch · Feb 7, 2017 · 6f4354f · 6f4354f
2 parents 6a0b502 + 8a1b5bf
commit 6f4354f
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 24 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -384,5 +384,12 @@ Version 1.2.3
 * Added scripts to compute stats
 * Added new option for TaggD
 
+Version 1.2.4
+* Fixed small bugs
+* Small improvements in st_qa.py and convertEnsemblToNames.py
+
 Version 1.2.5
-* Changed the limit range of some parameters
+* Changed the limit range of some parameters
+
+Version 1.2.6
+* Take into account soft-clipped bases when computing start/end positions
diff --git a/scripts/convertEnsemblToNames.py b/scripts/convertEnsemblToNames.py
@@ -35,7 +35,7 @@ def main(st_data_file, names_map, output_file):
     adjustedList = list()
     for gene in st_data.columns:
         try:
-            gene = genes_map[gene]
+            gene = genes_map[gene.split(".")[0]]
         except KeyError:
             sys.stdout.write("Warning, {} was not found in the MAP file\n".format(gene))
         adjustedList.append(gene)

diff --git a/scripts/st_qa.py b/scripts/st_qa.py
@@ -18,34 +18,23 @@
 import matplotlib.mlab as mlab
 
 def histogram(x_points, output, title="Histogram", xlabel="X",
-              nbins=50, normed=1, color="blue", alpha=1.0):
-    """ This function generates a simple density histogram
+              ylabel="Y", nbins=50, color="blue"):
+    """ This function generates a simple histogram
     with the points given as input.
     :param x_points: a list of x coordinates
     :param title: the title for the plot
     :param xlabel: the name of the X label
+    :param ylabel: the name of the X label
     :param output: the name/path of the output file
-    :param alpha: the alpha transparency level for the histogram
     :param nbins: the number of bings for the histogram
-    :param normed: the normalization factor
     :param color: the color for the histogram
     """
+    # Create the plot
     fig = plt.figure()
-
-    # the histogram of the data
-    n, bins, patches = plt.hist(x_points, bins=nbins, 
-                                normed=normed, facecolor=color, alpha=alpha)
-
-    mean = np.mean(x_points)
-    std_dev = np.std(x_points)
-    # add a 'best fit' line
-    y = mlab.normpdf(bins, mean, std_dev)
-    plt.plot(bins, y, 'r--', linewidth=1)
-    # generate plot
+    plt.hist(x_points, bins=nbins, facecolor=color)
     plt.xlabel(xlabel)
-    plt.ylabel("Probability")
+    plt.ylabel(ylabel)
     plt.title(title)
-
     # Tweak spacing to prevent clipping of ylabel
     plt.subplots_adjust(left=0.15)
     fig.set_size_inches(16, 16)
@@ -70,8 +59,10 @@ def main(input_data):
     average_genes_feature = np.mean(aggregated_gene_counts)
     std_reads_feature = np.std(aggregated_spot_counts)
     std_genes_feature = np.std(aggregated_gene_counts)
-    histogram(aggregated_spot_counts, nbins=20, output="histogram_counts.png", title="Reads per feature")
-    histogram(aggregated_gene_counts, nbins=20, output="histogram_genes.png", title="Genes per feature")
+    histogram(aggregated_spot_counts, nbins=20, xlabel="#Transcripts", ylabel="#Features",
+              output="hist_counts.png", title="Transcripts per feature")
+    histogram(aggregated_gene_counts, nbins=20, xlabel="#Genes", ylabel="#Features", 
+              output="hist_genes.png", title="Genes per feature")
     print("Number of features: {}".format(total_barcodes))
     print("Number of unique molecules present: {}".format(total_transcripts))
     print("Number of unique genes present: {}".format(number_genes))

diff --git a/stpipeline/common/sam_utils.py b/stpipeline/common/sam_utils.py
@@ -31,8 +31,9 @@ def parseUniqueEvents(filename):
     for rec in sam_file.fetch(until_eof=True):
         clear_name = rec.query_name
         mapping_quality = rec.mapping_quality
-        start = rec.reference_start
-        end = rec.reference_end
+        # Account for soft-clipped bases when retrieving the stard/end coordinates
+        start = rec.reference_start - rec.query_alignment_start
+        end = rec.reference_end + (rec.query_length - rec.query_alignment_end)
         chrom = sam_file.getrname(rec.reference_id)
         strand = "-" if rec.is_reverse else "+"
         # Get TAGGD tags

diff --git a/stpipeline/version.py b/stpipeline/version.py
@@ -1 +1 @@
-version_number = "1.2.5"
+version_number = "1.2.6"