Skip to content

Commit

Permalink
Add the cell_id information to the smartseq output.
Browse files Browse the repository at this point in the history
  • Loading branch information
mourisl committed Apr 8, 2024
1 parent 73d5062 commit 735b0d1
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ TRUST4 is also available form [Bioconda](https://anaconda.org/bioconda/trust4).
-t INT: number of threads (default: 1)
-k INT: the starting k-mer size for indexing contigs (default: 9)
--barcode STRING: if -b, bam field for barcode; if -1 -2/-u, file containing barcodes (defaul: not used)
--barcodeLevel STRING: barcode is for cell or molecule (default: cell)
--barcodeWhitelist STRING: path to the barcode whitelist (default: not used)
--barcodeTranslate STRING: path to the barcode translate file (default: not used)
--UMI STRING: if -b, bam field for 10x Genomics-like UMI; if -1 -2/-u, file containing 10x Genomics-like UMIs (default: not used)
--readFormat STRING: format for read, barcode and UMI files (example: r1:0:-1,r2:0:-1,bc:0:15,um:16:-1 for paired-end files with barcode and UMI)
--repseq: the data is from TCR-seq or BCR-seq (default: not set)
--barcodeLevel STRING: barcode is for cell or molecule (default: cell)
--minHitLen INT: the minimal hit length for a valid overlap (default: auto)
--mateIdSuffixLen INT: the suffix length in read id for mate. (default: not used)
--skipMateExtension: do not extend assemblies with mate information, useful for SMART-seq (default: not used)
Expand Down
4 changes: 2 additions & 2 deletions run-trust4
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use Cwd qw(cwd abs_path) ;
use File::Basename ;
use File::Path qw(make_path) ;

die "TRUST4 v1.1.0-r499 usage: ./run-trust4 [OPTIONS]:\n".
die "TRUST4 v1.1.0-r500 usage: ./run-trust4 [OPTIONS]:\n".
"Required:\n".
#"\t[Input]:\n".
"\t-b STRING: path to bam file\n".
Expand All @@ -22,6 +22,7 @@ die "TRUST4 v1.1.0-r499 usage: ./run-trust4 [OPTIONS]:\n".
"\t-k INT: the starting k-mer size for indexing contigs (default: 9)\n".
#"\t-h: print help message and exit.\n"
"\t--barcode STRING: if -b, bam field for barcode; if -1 -2/-u, file containing barcodes (default: not used)\n".
"\t--barcodeLevel STRING: barcode is for cell or molecule (default: cell)\n".
#"\t--barcodeRange INT INT CHAR: start, end(-1 for length-1), strand in a barcode is the true barcode (default: 0 -1 +)\n".
"\t--barcodeWhitelist STRING: path to the barcode whitelist (default: not used)\n".
"\t--barcodeTranslate STRING: path to the barcode translate file (default: not used)\n".
Expand All @@ -31,7 +32,6 @@ die "TRUST4 v1.1.0-r499 usage: ./run-trust4 [OPTIONS]:\n".
#"\t--umiRange INT INT CHAR: start, end(-1 for lenght-1), strand in a UMI is the true UMI (default: 0 -1 +)\n".
"\t--readFormat STRING: format for read, barcode and UMI files (example: r1:0:-1,r2:0:-1,bc:0:15,um:16:-1 for paired-end files with barcode and UMI)\n".
"\t--repseq: the data is from TCR-seq or BCR-seq (default: not set)\n".
"\t--barcodeLevel STRING: barcode is for cell or molecule (default: cell)\n".
"\t--minHitLen INT: the minimal hit length for a valid overlap (default: auto)\n".
"\t--mateIdSuffixLen INT: the suffix length in read id for mate. (default: not used)\n".
"\t--skipMateExtension: do not extend assemblies with mate information, useful for SMART-seq (default: not used)\n".
Expand Down
13 changes: 13 additions & 0 deletions trust-smartseq.pl
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,22 @@ sub GetPairChainType
# Process the AIRR file
open FPairr, "tmp_smartseq_airr.tsv" ;
my $lineCnt = 0 ;
my %airrNameToCol ;
while (<FPairr>)
{
chomp ;
if ($cellProcessed == 0 && $lineCnt == 0)
{
print FPfinalairr $_,"\n" ;

chomp ;
my @cols = split /\t/ ;
for ($i = 0 ; $i < scalar(@cols) ; ++$i)
{
$airrNameToCol{ $cols[$i] } = $i ;
}
}

if ($lineCnt == 0)
{
++$lineCnt ;
Expand All @@ -254,6 +263,10 @@ sub GetPairChainType
if ($matchedCols[2] eq $cols[13])
{
$cols[0] = ${cellPrefix}."_".$cols[0] ;

# Add the cell id
$cols[ $airrNameToCol{"cell_id"} ] = $cellPrefix ;

print FPfinalairr join("\t", @cols), "\n" ;
}
}
Expand Down

0 comments on commit 735b0d1

Please sign in to comment.