Skip to content

Commit

Permalink
change the consensus at the tip to the read's nucleotide during exten…
Browse files Browse the repository at this point in the history
…sion if it is only supported by one read.
  • Loading branch information
mourisl committed Oct 17, 2023
1 parent ee22eb6 commit acb6a4a
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
62 changes: 60 additions & 2 deletions SeqSet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3524,6 +3524,8 @@ class SeqSet
// Compute the new consensus.
if ( extendedOverlaps[0].readStart > 0 || extendedOverlaps[0].readEnd < len - 1 )
{
SimpleVector<struct _pair> consensusReplacement ;

char *newConsensus = (char *)malloc( sizeof( char ) * (
( extendedOverlaps[0].readStart + len - 1 -extendedOverlaps[0].readEnd ) + seq.consensusLen + 1 ) ) ;

Expand Down Expand Up @@ -3575,6 +3577,17 @@ class SeqSet
{
if ( i + shift >= len || r[i + shift] == 'N' )
continue ;
// If the current weight is 1, change the consensus to the newly input nucleotide
if (r[i + shift] != newConsensus[i + shift]
&& newConsensus[i + shift] != 'N' // The equal to N case will be handled later
&& seq.posWeight[i + shift].count[ nucToNum[newConsensus[i + shift] - 'A']] == 1)
{
struct _pair np ;
np.a = i + shift ;
np.b = (int)(r[i + shift]) ;
consensusReplacement.PushBack(np) ;
}

for ( j = 0 ; j < 4 ; ++j )
if ( r[i + shift] != numToNuc[j] && seq.posWeight[i + shift].count[j] > 1 )
--seq.posWeight[i + shift].count[j] ;
Expand All @@ -3590,11 +3603,24 @@ class SeqSet
for ( i = seq.consensusLen - 2 ; i < seq.consensusLen ; ++i )
{
int pos = i - extendedOverlaps[0].seqStart ;
int seqPos = i + shift ;
if ( pos < 0 || r[pos] == 'N' )
continue ;

// If the current weight is 1, change the consensus to the newly input nucleotide
if (r[pos] != newConsensus[seqPos]
&& newConsensus[seqPos] != 'N'
&& seq.posWeight[seqPos].count[ nucToNum[newConsensus[seqPos] - 'A']] == 1)
{
struct _pair np ;
np.a = seqPos ;
np.b = (int)r[pos] ;
consensusReplacement.PushBack(np) ;
}

for ( j = 0 ; j < 4 ; ++j )
if ( r[pos] != numToNuc[j] && seq.posWeight[i].count[j] > 1 )
--seq.posWeight[i].count[j] ;
if ( r[pos] != numToNuc[j] && seq.posWeight[seqPos].count[j] > 1 )
--seq.posWeight[seqPos].count[j] ;
}
}

Expand Down Expand Up @@ -3654,6 +3680,13 @@ class SeqSet
seq.consensus = newConsensus ;
seq.consensusLen = newConsensusLen ;
//printf( "new consensus len %d\n", seq.consensusLen ) ;

int size = consensusReplacement.Size() ;
for (i = 0 ; i < size ; ++i)
{
struct _pair &p = consensusReplacement[i] ;
SubstituteConsensusPos(seqIdx, p.a, (char)p.b, true) ;
}
}
else // the read is inside of the seq.
readInConsensusOffset = extendedOverlaps[0].seqStart ;
Expand Down Expand Up @@ -10045,6 +10078,31 @@ class SeqSet
ret += seqs[seqIdx].posWeight[i].count[ nucToNum[seqs[seqIdx].consensus[i] - 'A' ] ] ;
return ret ;
}

// Substitue one base of the consensus[seqIdx] to another alphabet c
void SubstituteConsensusPos(int seqIdx, int pos, char c, bool updateIndex)
{
struct _seqWrapper &seq = seqs[seqIdx] ;
if (pos >= seq.consensusLen || seq.consensus[pos] == c)
return ;

KmerCode kmerCode( kmerLength ) ;
int start = pos - kmerLength + 1 ;
int end = pos + kmerLength - 1 ;
if (start < 0)
start = 0 ;
if (end >= seq.consensusLen)
end = seq.consensusLen - 1 ;

if (updateIndex)
seqIndex.RemoveIndexFromRead(kmerCode, seq.consensus + start,
end - start + 1, seqIdx, start) ;

seq.consensus[pos] = c ;

if (updateIndex)
seqIndex.BuildIndexFromRead(kmerCode, seq.consensus + start, end - start + 1, seqIdx, start) ;
}

void SetIsLongSeqSet( bool in )
{
Expand Down
9 changes: 8 additions & 1 deletion main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ int main( int argc, char *argv[] )
assembledReadIdx.push_back( rescueReadIdx[i] ) ;
}
#ifdef DEBUG
printf( "done\n" ) ;
printf( "rescue done\n" ) ;
#endif
}
seqSet.UpdateAllConsensus() ;
Expand Down Expand Up @@ -1523,6 +1523,13 @@ int main( int argc, char *argv[] )
if ( outputPrefix[0] != '-' )
fclose( fp ) ;

for ( i = 0 ; i < readCnt ; ++i )
{
free( sortedReads[i].id ) ;
free( sortedReads[i].read ) ;
if ( sortedReads[i].qual != NULL )
free( sortedReads[i].qual ) ;
}
return 0 ;
}

Expand Down
2 changes: 1 addition & 1 deletion run-trust4
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use Cwd 'cwd' ;
use Cwd 'abs_path' ;
use File::Basename ;

die "TRUST4 v1.0.12-r465 usage: ./run-trust4 [OPTIONS]:\n".
die "TRUST4 v1.0.13-r473 usage: ./run-trust4 [OPTIONS]:\n".
"Required:\n".
#"\t[Input]:\n".
"\t-b STRING: path to bam file\n".
Expand Down

0 comments on commit acb6a4a

Please sign in to comment.