Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Oct 20, 2020
2 parents dd56eed + 6cdaa6a commit cfbed3d
Show file tree
Hide file tree
Showing 46 changed files with 919 additions and 247 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public static void main(@NotNull String[] args) throws ParseException, IOExcepti
}

LOGGER.info("Loading shallow seq runs from {}", cmd.getOptionValue(RUNS_DIRECTORY));
List<RunContext> runContexts = loadRunContexts(cmd.getOptionValue(RUNS_DIRECTORY));
List<RunContext> runContexts = loadRunContexts(cmd.getOptionValue(RUNS_DIRECTORY), cmd.getOptionValue(PIPELINE_VERSION));

List<LimsShallowSeqData> newShallowSeqEntries = extractNewEntriesForShallowDbFromRunContexts(runContexts,
cmd.getOptionValue(SHALLOW_SEQ_TSV),
Expand Down Expand Up @@ -139,8 +139,8 @@ private static List<LimsShallowSeqData> read(@NotNull String shallowSeqTsv) thro
}

@NotNull
private static List<RunContext> loadRunContexts(@NotNull String runsDirectory) throws IOException {
List<RunContext> runContexts = RunsFolderReader.extractRunContexts(new File(runsDirectory));
private static List<RunContext> loadRunContexts(@NotNull String runsDirectory, @NotNull String pipelineVersionFile) throws IOException {
List<RunContext> runContexts = RunsFolderReader.extractRunContexts(new File(runsDirectory), pipelineVersionFile);
LOGGER.info(" Loaded run contexts from {} ({} sets)", runsDirectory, runContexts.size());

return runContexts;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public final class LoadClinicalData {

private static final Logger LOGGER = LogManager.getLogger(LoadClinicalData.class);
private static final String VERSION = LoadClinicalData.class.getPackage().getImplementationVersion();
private static final String PIPELINE_VERSION = "pipeline_version_file";

private static final String RUNS_DIRECTORY = "runs_dir";

Expand Down Expand Up @@ -112,7 +113,7 @@ public static void main(@NotNull String[] args) throws ParseException, IOExcepti
TreatmentCurator treatmentCurator = new TreatmentCurator(cmd.getOptionValue(TREATMENT_MAPPING_CSV));

LOGGER.info("Loading sequence runs from {}", cmd.getOptionValue(RUNS_DIRECTORY));
List<RunContext> runContexts = loadRunContexts(cmd.getOptionValue(RUNS_DIRECTORY));
List<RunContext> runContexts = loadRunContexts(cmd.getOptionValue(RUNS_DIRECTORY), cmd.getOptionValue(PIPELINE_VERSION));
Map<String, List<String>> sequencedSamplesPerPatient = extractSequencedSamplesFromRunContexts(runContexts);
Map<String, String> sampleToSetNameMap = extractSampleToSetNameMap(runContexts);
Set<String> sequencedPatientIds = sequencedSamplesPerPatient.keySet();
Expand Down Expand Up @@ -162,8 +163,8 @@ public static void main(@NotNull String[] args) throws ParseException, IOExcepti
}

@NotNull
private static List<RunContext> loadRunContexts(@NotNull String runsDirectory) throws IOException {
List<RunContext> runContexts = RunsFolderReader.extractRunContexts(new File(runsDirectory));
private static List<RunContext> loadRunContexts(@NotNull String runsDirectory, @NotNull String pipelineVersion) throws IOException {
List<RunContext> runContexts = RunsFolderReader.extractRunContexts(new File(runsDirectory), pipelineVersion);
LOGGER.info(" Loaded run contexts from {} ({} sets)", runsDirectory, runContexts.size());

return runContexts;
Expand Down Expand Up @@ -618,6 +619,8 @@ private static Options createOptions() {
options.addOption(TREATMENT_MAPPING_CSV, true, "Path towards to the CSV of mapping the treatments.");
options.addOption(BIOPSY_MAPPING_CSV, true, "Path towards to the CSV of mapping of biopsies.");

options.addOption(PIPELINE_VERSION, true, "Path towards the pipeline version");

addDatabaseCmdLineArgs(options);
return options;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonElement;
import com.google.gson.JsonNull;
import com.google.gson.JsonObject;
import com.hartwig.hmftools.common.utils.io.exception.EmptyFileException;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand All @@ -29,6 +33,7 @@ final class MetaDataResolver {
private static final String REF_SAMPLE_OBJECT_P5 = "reference";
private static final String TUMOR_SAMPLE_OBJECT_P5 = "tumor";
private static final String SET_NAME_FIELD_P5 = "runName";
private static final String SET_NAME_FIELD_POST_P5_15 = "set";

private static final String BARCODE_START = "FR";
private static final String BARCODE_START_OLD = "HMF";
Expand All @@ -39,7 +44,7 @@ private MetaDataResolver() {
}

@Nullable
static RunContext fromMetaDataFile(@NotNull String runDirectory) {
static RunContext fromMetaDataFile(@NotNull String runDirectory, @NotNull String pipelineVersionFile) throws IOException {
File metaDataFileP4 = new File(runDirectory + File.separator + METADATA_FILE_P4);
File metaDataFileP5 = new File(runDirectory + File.separator + METADATA_FILE_P5);

Expand All @@ -52,7 +57,7 @@ static RunContext fromMetaDataFile(@NotNull String runDirectory) {
}
} else if (metaDataFileP5.exists()) {
try {
return fromPv5MetaData(runDirectory, metaDataFileP5);
return fromPv5MetaData(runDirectory, metaDataFileP5, new File(runDirectory + File.separator + pipelineVersionFile));
} catch (FileNotFoundException exception) {
LOGGER.warn("Could not find meta data file '{}' for run dir '{}'.", METADATA_FILE_P5, runDirectory);
return null;
Expand Down Expand Up @@ -100,13 +105,23 @@ private static RunContext fromPv4MetaData(@NotNull String runDirectory, @NotNull
}

@Nullable
private static RunContext fromPv5MetaData(@NotNull String runDirectory, @NotNull File pv5MetadataFile) throws FileNotFoundException {
private static RunContext fromPv5MetaData(@NotNull String runDirectory, @NotNull File pv5MetadataFile,
@NotNull File pipelineVersionFile) throws IOException {
JsonObject json = GSON.fromJson(new FileReader(pv5MetadataFile), JsonObject.class);

String pipelineVersion = readPipelineVersion(pipelineVersionFile);

String refSample = sampleIdP5(json, REF_SAMPLE_OBJECT_P5);
String tumorSample = sampleIdP5(json, TUMOR_SAMPLE_OBJECT_P5);
String tumorBarcodeSample = sampleBarcodeP5(json, TUMOR_SAMPLE_OBJECT_P5);
String setName = fieldValue(json, SET_NAME_FIELD_P5);
String tumorBarcodeSample = sampleBarcodeP5(json, TUMOR_SAMPLE_OBJECT_P5, pipelineVersion);

String setName;
if (pipelineVersion.substring(2, 4).matches("[0-9]+") && Integer.valueOf(pipelineVersion.substring(2, 4)) >= 15) {
setName = fieldValue(json, SET_NAME_FIELD_POST_P5_15);
} else {
// this is pre 5.15 pipelines
setName = fieldValue(json, SET_NAME_FIELD_P5);
}

if (refSample == null) {
LOGGER.warn("Could not find ref sample id in metadata object '{}'!", REF_SAMPLE_OBJECT_P5);
Expand Down Expand Up @@ -142,12 +157,35 @@ private static String sampleIdP5(@NotNull JsonObject metadata, @NotNull String o
}

@Nullable
private static String sampleBarcodeP5(@NotNull JsonObject metadata, @NotNull String objectName) {
private static String sampleBarcodeP5(@NotNull JsonObject metadata, @NotNull String objectName, @NotNull String pipelineVersion) {
JsonObject object = metadata.getAsJsonObject(objectName);
if (object == null) {
return null;
}
JsonElement sampleBarcodeId = object.get("sampleId");
JsonElement sampleBarcodeId;
if (pipelineVersion.substring(2, 4).matches("[0-9]+") && Integer.valueOf(pipelineVersion.substring(2, 4)) >= 15) {
sampleBarcodeId = object.get("barcode");
} else {
// this is pre 5.15 pipelines
sampleBarcodeId = object.get("sampleId");
}
return sampleBarcodeId != null && !(sampleBarcodeId instanceof JsonNull) ? sampleBarcodeId.getAsString() : null;
}

@NotNull
private static String readPipelineVersion(@NotNull File pipelineVersionFile) throws IOException {
List<String> lines = Files.readAllLines(pipelineVersionFile.toPath());
if (lines.isEmpty()) {
throw new EmptyFileException(pipelineVersionFile.toString());
} else {
if (lines.size() == 1) {
return lines.get(0);
} else {
LOGGER.warn("File of size is to big!");
return Strings.EMPTY;
}
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ private ProductionRunContextFactory() {
}

@NotNull
public static RunContext fromRunDirectory(@NotNull String runDirectory) throws IOException {
RunContext runContextFromMetaData = MetaDataResolver.fromMetaDataFile(runDirectory);
public static RunContext fromRunDirectory(@NotNull String runDirectory, @NotNull String pipelineVersionFile) throws IOException {
RunContext runContextFromMetaData = MetaDataResolver.fromMetaDataFile(runDirectory, pipelineVersionFile);
if (runContextFromMetaData == null) {
throw new IOException("Could not resolve run context from meta data for " + runDirectory);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ private RunsFolderReader() {
}

@NotNull
public static List<RunContext> extractRunContexts(@NotNull File dir) throws IOException {
public static List<RunContext> extractRunContexts(@NotNull File dir, @NotNull String pipelineVersionFile) throws IOException {
List<RunContext> runContexts = Lists.newArrayList();
File[] folders = dir.listFiles(File::isDirectory);
if (folders == null) {
Expand All @@ -29,7 +29,7 @@ public static List<RunContext> extractRunContexts(@NotNull File dir) throws IOEx

for (File folder : folders) {
if (folder.exists() && folder.isDirectory()) {
runContexts.add(ProductionRunContextFactory.fromRunDirectory(folder.getPath()));
runContexts.add(ProductionRunContextFactory.fromRunDirectory(folder.getPath(), pipelineVersionFile));
} else {
LOGGER.warn("Could not process run since file '{}' doesn't seem to be a folder", folder.getPath());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static org.junit.Assert.assertNull;

import java.io.File;
import java.io.IOException;

import com.google.common.io.Resources;

Expand All @@ -16,34 +17,34 @@ public class MetaDataResolverTest {
private static final String RESOURCE_DIR = Resources.getResource("context").getPath();

@Test
public void noMetaDataReturnsNull() {
public void noMetaDataReturnsNull() throws IOException {
String noMetaDataRunDir = RESOURCE_DIR + File.separator + "RunDirNoMetaData";
assertNull(MetaDataResolver.fromMetaDataFile(noMetaDataRunDir));
assertNull(MetaDataResolver.fromMetaDataFile(noMetaDataRunDir, ""));
}

@Test
public void noRefSampleReturnsNull() {
public void noRefSampleReturnsNull() throws IOException{
String noRefSampleRunDir = RESOURCE_DIR + File.separator + "RunDirNoRefSample";
assertNull(MetaDataResolver.fromMetaDataFile(noRefSampleRunDir));
assertNull(MetaDataResolver.fromMetaDataFile(noRefSampleRunDir, ""));
}

@Test
public void noSetNameReturnsNull() {
public void noSetNameReturnsNull() throws IOException{
String noSetNameRunDir = RESOURCE_DIR + File.separator + "RunDirNoSetName";
assertNull(MetaDataResolver.fromMetaDataFile(noSetNameRunDir));
assertNull(MetaDataResolver.fromMetaDataFile(noSetNameRunDir, ""));
}

@Test
public void canResolveMetaDataFilePV5() {
public void canResolveMetaDataFilePV5() throws IOException{
String noSetNameRunDir = RESOURCE_DIR + File.separator + "RunDirP5";
assertNotNull(MetaDataResolver.fromMetaDataFile(noSetNameRunDir));
assertNotNull(MetaDataResolver.fromMetaDataFile(noSetNameRunDir, "pipeline.version"));
}

@Test
public void canResolveSomaticMetaDataP4() {
public void canResolveSomaticMetaDataP4() throws IOException{
String setName = "RunDirSomatic";
String runDirectory = RESOURCE_DIR + File.separator + setName;
RunContext runContext = MetaDataResolver.fromMetaDataFile(runDirectory);
RunContext runContext = MetaDataResolver.fromMetaDataFile(runDirectory, "");

assertNotNull(runContext);
assertEquals("CPCT12345678R", runContext.refSample());
Expand All @@ -54,10 +55,24 @@ public void canResolveSomaticMetaDataP4() {
}

@Test
public void canResolveSomaticMetaDataP5() {
public void canResolveSomaticMetaDataP5() throws IOException{
String setName = "RunDirP5";
String runDirectory = RESOURCE_DIR + File.separator + setName;
RunContext runContext = MetaDataResolver.fromMetaDataFile(runDirectory);
RunContext runContext = MetaDataResolver.fromMetaDataFile(runDirectory, "pipeline.version");

assertNotNull(runContext);
assertEquals("CPCT12345678R", runContext.refSample());
assertEquals("CPCT12345678T", runContext.tumorSample());
assertEquals(setName, runContext.setName());
assertEquals(runDirectory, runContext.runDirectory());
assertEquals("AB456", runContext.tumorBarcodeSample());
}

@Test
public void canResolveSomaticMetaDataPostP5_15() throws IOException{
String setName = "RunDirPostP5_15";
String runDirectory = RESOURCE_DIR + File.separator + setName;
RunContext runContext = MetaDataResolver.fromMetaDataFile(runDirectory, "pipeline.version");

assertNotNull(runContext);
assertEquals("CPCT12345678R", runContext.refSample());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ public class ProductionRunContextFactoryTest {
@Test
public void picksMetadataWhenAvailable() throws IOException {
String runDirectory = RESOURCE_DIR + File.separator + "RunDirSomatic";
assertNotNull(ProductionRunContextFactory.fromRunDirectory(runDirectory));
assertNotNull(ProductionRunContextFactory.fromRunDirectory(runDirectory, ""));
}

@Test(expected = IOException.class)
public void throwExceptionWhenNoMetaData() throws IOException {
String runDirectory = RESOURCE_DIR + File.separator + "DoesNotExist";
assertNotNull(ProductionRunContextFactory.fromRunDirectory(runDirectory));
assertNotNull(ProductionRunContextFactory.fromRunDirectory(runDirectory, ""));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5.7.123
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"reference": {
"sampleName": "CPCT12345678R",
"barcode": "AB984",
"type": "REFERENCE",
"id": "123",
"bucket": "output-bucket",
"set": "RunDirPostP5_15"
},
"tumor": {
"sampleName": "CPCT12345678T",
"barcode": "AB456",
"type": "TUMOR",
"id": "123",
"bucket": "output-bucket",
"set": "RunDirPostP5_15"
},
"id": "123",
"bucket": "output-bucket",
"set": "RunDirPostP5_15"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5.15.567
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static Candidate toCandidate(@NotNull final VariantContext context, @NotN
}

@NotNull
static IndexedBases readBases(@NotNull final VariantContext context) {
public static IndexedBases readBases(@NotNull final VariantContext context) {
final int position = context.getStart();

final String leftFlank = context.getAttributeAsString(READ_CONTEXT_LEFT_FLANK, Strings.EMPTY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static com.hartwig.hmftools.common.cli.Configs.defaultStringValue;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
Expand Down Expand Up @@ -189,7 +190,7 @@ default int maxSkippedReferenceRegions() {

@NotNull
static SageConfig createConfig(boolean appendMode, @NotNull final String version, @NotNull final CommandLine cmd)
throws ParseException {
throws ParseException, IOException {
final int threads = defaultIntValue(cmd, THREADS, DEFAULT_THREADS);
final String assembly = cmd.getOptionValue(ASSEMBLY, "UNKNOWN");

Expand Down Expand Up @@ -277,6 +278,11 @@ static SageConfig createConfig(boolean appendMode, @NotNull final String version
transcripts = assembly.equals("hg19") ? HmfGenePanelSupplier.allGeneList37() : HmfGenePanelSupplier.allGeneList38();
}

final File outputDir = new File(outputVcf).getParentFile();
if (outputDir != null && !outputDir.exists() && !outputDir.mkdirs()) {
throw new IOException("Unable to write directory " + outputDir.toString());
}

return ImmutableSageConfig.builder()
.version(version)
.transcriptRegions(transcripts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import static com.hartwig.hmftools.sage.phase.Phase.PHASE_BUFFER;

import java.util.Collection;
import java.util.Set;
import java.util.function.Consumer;

import com.google.common.collect.Sets;
import com.hartwig.hmftools.common.variant.hotspot.VariantHotspot;
import com.hartwig.hmftools.sage.read.ReadContext;
import com.hartwig.hmftools.sage.variant.SageVariant;
Expand All @@ -14,11 +16,27 @@
class LocalPhaseSet extends BufferedPostProcessor {

private int phase;
private final Set<Integer> passingPhaseSets = Sets.newHashSet();

LocalPhaseSet(@NotNull final Consumer<SageVariant> consumer) {
super(PHASE_BUFFER, consumer);
}

@NotNull
public Set<Integer> passingPhaseSets() {
return passingPhaseSets;
}

@Override
protected void preFlush(@NotNull final Collection<SageVariant> variants) {
super.preFlush(variants);
for (SageVariant variant : variants) {
if (variant.isPassing() && variant.localPhaseSet() > 0) {
passingPhaseSets.add(variant.localPhaseSet());
}
}
}

@Override
protected void processSageVariant(@NotNull final SageVariant newEntry, @NotNull final Collection<SageVariant> buffer) {
final ReadContext newReadContext = newEntry.readContext();
Expand Down
Loading

0 comments on commit cfbed3d

Please sign in to comment.