Skip to content

Commit

Permalink
resolving my own comments
Browse files Browse the repository at this point in the history
  • Loading branch information
lbergelson committed Feb 14, 2019
1 parent 50b98a9 commit a9065a3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 30 deletions.
21 changes: 13 additions & 8 deletions src/main/java/htsjdk/variant/variantcontext/Allele.java
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ public static boolean wouldBeNoCallAllele(final byte[] bases) {

/**
* @param bases bases representing an allele
* @return true if the bases represent a symbolic allele
* @return true if the bases represent a symbolic allele, including breakpoints and breakends
*/
public static boolean wouldBeSymbolicAllele(final byte[] bases) {
if ( bases.length <= 1 )
Expand All @@ -305,16 +305,18 @@ public static boolean wouldBeSymbolicAllele(final byte[] bases) {
wouldBeSingleBreakend(bases);
}
}

/**
* @param bases bases representing an allele
* @return true if the bases represent a symbolic allele in breakpoint notation
* @return true if the bases represent a symbolic allele in breakpoint notation, (ex: G]17:198982] or ]13:123456]T )
*/
public static boolean wouldBeBreakpoint(final byte[] bases) {
if ( bases.length <= 1 )
return false;
else {
for (int i = 0; i < bases.length; i++) {
if (bases[i] == '[' || bases[i] == ']') {
final byte base = bases[i];
if (base == '[' || base == ']') {
return true;
}
}
Expand All @@ -323,7 +325,7 @@ public static boolean wouldBeBreakpoint(final byte[] bases) {
}
/**
* @param bases bases representing an allele
* @return true if the bases represent a symbolic allele in single breakend notation
* @return true if the bases represent a symbolic allele in single breakend notation (ex: .A or A. )
*/
public static boolean wouldBeSingleBreakend(final byte[] bases) {
if ( bases.length <= 1 )
Expand Down Expand Up @@ -434,21 +436,24 @@ public static Allele create(final Allele allele, final boolean ignoreRefState) {
//
// ---------------------------------------------------------------------------------------------------------

// Returns true if this is the NO_CALL allele
/** @return true if this is the NO_CALL allele */
public boolean isNoCall() { return isNoCall; }
// Returns true if this is not the NO_CALL allele
public boolean isCalled() { return ! isNoCall(); }

// Returns true if this Allele is the reference allele
/** @return true if this Allele is the reference allele */
public boolean isReference() { return isRef; }
// Returns true if this Allele is not the reference allele

/** @return true if this Allele is not the reference allele */
public boolean isNonReference() { return ! isReference(); }

// Returns true if this Allele is symbolic (i.e. no well-defined base sequence)
/** @return true if this Allele is symbolic (i.e. no well-defined base sequence), this includes breakpoints and breakends */
public boolean isSymbolic() { return isSymbolic; }

/** @return true if this Allele is a breakpoint ( ex: G]17:198982] or ]13:123456]T ) */
public boolean isBreakpoint() { return wouldBeBreakpoint(bases); }

/** @return true if this Allele is a single breakend (ex: .A or A.) */
public boolean isSingleBreakend() { return wouldBeSingleBreakend(bases); }

// Returns a nice string representation of this object
Expand Down
25 changes: 3 additions & 22 deletions src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>

protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th

private final static int MAX_ALLELE_LENGTH_FOR_CACHING = 8;

// we have to store the list of strings that make up the header until they're needed
protected VCFHeader header = null;
protected VCFHeaderVersion version = null;
Expand Down Expand Up @@ -334,18 +332,12 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
else
builder.id(parts[2]);

final String ref = getCachedString(parts[3].toUpperCase());
final String ref = parts[3].toUpperCase();
String alts = parts[4];
if (!(alts.contains(".") || alts.length() > MAX_ALLELE_LENGTH_FOR_CACHING || (alts.length() > 1 && (alts.contains("[") || alts.contains("]") || alts.contains("."))))) {
// don't cache multiple alt allele which we're going to strsplit anyway
// don't cache long alleles as they're probably unique
// don't cache breakpoint/single breakend alleles as they're probably unique
alts = getCachedString(alts);
}
builder.log10PError(parseQual(parts[5]));

final List<String> filters = parseFilters(getCachedString(parts[6]));
if ( filters != null ) builder.filters(new HashSet<String>(filters));
if ( filters != null ) builder.filters(new HashSet<>(filters));
final Map<String, Object> attrs = parseInfo(parts[7]);
builder.attributes(attrs);

Expand Down Expand Up @@ -585,7 +577,7 @@ private static void checkAllele(String allele, boolean isRef, int lineNo) {
System.err.println(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
}

if ( isSymbolicAllele(allele) ) {
if (Allele.wouldBeSymbolicAllele(allele.getBytes())) {
if ( isRef ) {
generateException("Symbolic alleles not allowed as reference allele: " + allele, lineNo);
}
Expand Down Expand Up @@ -617,17 +609,6 @@ private static String generateExceptionTextForBadAlleleBases(final String allele
return "unparsable vcf record with allele " + allele;
}

/**
* return true if this is a symbolic allele (e.g. <SOMETAG>),
* structural variation breakend (with [ or ]), or single breakend (e.g. ATTA.),
* otherwise false
* @param allele the allele to check
* @return true if the allele is a symbolic allele, otherwise false
*/
private static boolean isSymbolicAllele(String allele) {
return Allele.wouldBeSymbolicAllele(allele.getBytes());
}

/**
* parse a single allele, given the allele list
* @param alleles the alleles available
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ public void testWouldBeSymbolic() {
}
@Test
public void testWouldBeBreakpoint() {
Assert.assertTrue(Allele.wouldBeBreakpoint("G]17:198982]".getBytes()));
Assert.assertTrue(Allele.wouldBeBreakpoint("]13:123456]T".getBytes()));
Assert.assertFalse(Allele.wouldBeBreakpoint("<DEL>".getBytes()));
Assert.assertTrue(Allele.wouldBeBreakpoint("AAAAAA[chr1:1234[".getBytes()));
Assert.assertTrue(Allele.wouldBeBreakpoint("AAAAAA]chr1:1234]".getBytes()));
Expand All @@ -299,6 +301,8 @@ public void testWouldBeBreakpoint() {
}
@Test
public void testWouldBeBreakend() {
Assert.assertFalse(Allele.wouldBeSingleBreakend("G]17:198982]".getBytes()));
Assert.assertFalse(Allele.wouldBeSingleBreakend("]13:123456]T".getBytes()));
Assert.assertFalse(Allele.wouldBeSingleBreakend("<DEL>".getBytes()));
Assert.assertFalse(Allele.wouldBeSingleBreakend("AAAAAA[chr1:1234[".getBytes()));
Assert.assertFalse(Allele.wouldBeSingleBreakend("AAAAAA]chr1:1234]".getBytes()));
Expand Down

0 comments on commit a9065a3

Please sign in to comment.