-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
making changes to reduce size of giant interval lists #1309
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,9 +23,13 @@ | |
*/ | ||
package htsjdk.samtools.util; | ||
|
||
import htsjdk.utils.ValidationUtils; | ||
|
||
import java.util.ConcurrentModificationException; | ||
import java.util.Iterator; | ||
import java.util.NoSuchElementException; | ||
import java.util.Objects; | ||
import java.util.function.BiFunction; | ||
|
||
/** | ||
* A Red-Black tree with intervals for keys. | ||
|
@@ -56,6 +60,8 @@ public void clear() | |
mRoot = null; | ||
} | ||
|
||
|
||
|
||
/** | ||
* Put a new interval into the tree (or update the value associated with an existing interval). | ||
* If the interval is novel, the special sentinel value is returned. | ||
|
@@ -64,7 +70,6 @@ public void clear() | |
* @param value The associated value. | ||
* @return The old value associated with that interval, or the sentinel. | ||
*/ | ||
@SuppressWarnings("null") | ||
public V put( final int start, final int end, final V value ) | ||
{ | ||
if ( start > end ) | ||
|
@@ -114,6 +119,34 @@ public V put( final int start, final int end, final V value ) | |
return result; | ||
} | ||
|
||
/** | ||
* If the specified start and end positions are not already associated with a value or are | ||
* associated with the sentinel ( see {@link #getSentinel()}, associates it with the given (non-sentinel) value. | ||
* Otherwise, replaces the associated value with the results of the given | ||
* remapping function, or removes if the result is equal to the sentinel value. This | ||
* method may be of use when combining multiple values that have the same start and end position. | ||
* | ||
* @param start interval start position | ||
* @param end interval end position | ||
* @param value value to merge into the tree, must not be equal to the sentinel value | ||
* @param remappingFunction a function that merges the new value with the existing value for the same start and end position | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note that if function returns null the region will be left empty There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
* @return the updated value that is now stored in the tree or sentinel | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went with a slight modification:
|
||
*/ | ||
public V merge(int start, int end, V value, BiFunction<? super V, ? super V, ? extends V> remappingFunction) { | ||
ValidationUtils.validateArg(!Objects.equals(value, mSentinel), "Values equal to the sentinel value may not be merged"); | ||
final V alreadyPresent = put(start, end, value); | ||
if (!Objects.equals(alreadyPresent, mSentinel)) { | ||
final V newComputedValue = remappingFunction.apply(value, alreadyPresent); | ||
if(Objects.equals(newComputedValue, mSentinel)){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. spaces There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
remove(start, end); | ||
} else { | ||
put(start, end, newComputedValue); | ||
} | ||
return newComputedValue; | ||
} | ||
return value; | ||
} | ||
|
||
/** | ||
* Remove an interval from the tree. If the interval does not exist in the tree the | ||
* special sentinel value is returned. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
package htsjdk.samtools.util; | ||
|
||
import java.util.*; | ||
import java.util.function.BiFunction; | ||
|
||
/** | ||
* Utility class to efficiently do in memory overlap detection between a large | ||
|
@@ -80,25 +81,35 @@ public void addLhs(final T object, final Locatable interval) { | |
} | ||
|
||
final int start = interval.getStart() + this.lhsBuffer; | ||
final int end = interval.getEnd() - this.lhsBuffer; | ||
final int end = interval.getEnd() - this.lhsBuffer; | ||
|
||
final Set<T> newValue = Collections.singleton(object); | ||
if (start <= end) { // Don't put in sequences that have no overlappable bases | ||
final Set<T> alreadyThere = tree.put(start, end, newValue); | ||
if (alreadyThere != null) { | ||
if( alreadyThere.size() == 1){ | ||
Set<T> mutableSet = new HashSet<>(2); | ||
mutableSet.addAll(alreadyThere); | ||
mutableSet.add(object); | ||
tree.put(start, end, mutableSet); | ||
} else { | ||
alreadyThere.add(object); | ||
tree.put(start, end, alreadyThere); | ||
} | ||
} | ||
tree.merge(start, end, | ||
Collections.singleton(object), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wierd spacing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
mergeSetsAccountingForSingletons()); | ||
} | ||
} | ||
|
||
/** | ||
* merge two Sets, assumes sets of size 1 are immutale | ||
*/ | ||
private static <T> BiFunction<Set<T>, Set<T>, Set<T>> mergeSetsAccountingForSingletons() { | ||
return (newValue, oldValue) -> { | ||
// Sets of size 1 are immutable SingletonSets so we have to make a new | ||
// mutable one to add to | ||
if (oldValue.size() == 1) { | ||
Set<T> newMutableSet = new HashSet<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can replace entire if statement with: final Set<T> newMutableSet = oldValue.size() == 1 ? new HashSet<>() : oldValue;
newMutableSet.addAll(oldValue);
newMutableSet.addAll(newValue);
return newMutableSet; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh right, clever, it's a set so you can unconditionally add the same thing over again... |
||
newMutableSet.addAll(oldValue); | ||
newMutableSet.addAll(newValue); | ||
return newMutableSet; | ||
// otherwise it's already a HashSet and we can just add values to it | ||
} else { | ||
oldValue.addAll(newValue); | ||
return oldValue; | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* Adds all items to the overlap detector. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
extra nls
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done