Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FastqToSam stdin fix for #915 #1910

Merged
merged 5 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions src/main/java/picard/nio/PicardHtsPath.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,21 @@

package picard.nio;

import htsjdk.io.HtsPath;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;

/**
* A Subclass of {@link HtsPath} with conversion to {@link Path} making use of {@link IOUtil}
*/
Expand Down Expand Up @@ -108,6 +111,24 @@ public static PicardHtsPath fromPath(final Path path){
return new PicardHtsPath(new HtsPath(path.toUri().toString()));
}

/**
* Test if {@code ioPath} is something other than a regular file, directory, or symbolic link.
*
* @return {@code true} if it's a device, named pipe, htsget API URL, etc.
* @throws RuntimeException if an I/O error occurs when creating the file system
*/
public static boolean isOther(final IOPath ioPath) {
if(ioPath.isPath()) {
try {
return Files.readAttributes(ioPath.toPath(), BasicFileAttributes.class).isOther();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else {
return true;
}
}

/**
* Create a {@link List<Path>} from {@link PicardHtsPath}s
* @param picardHtsPaths may NOT be null
Expand Down
63 changes: 37 additions & 26 deletions src/main/java/picard/sam/FastqToSam.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@
*/
package picard.sam;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;

import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
Expand All @@ -44,24 +55,12 @@
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.SolexaQualityConverter;
import htsjdk.samtools.util.StringUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import picard.nio.PicardHtsPath;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
* Converts a FASTQ file to an unaligned BAM or SAM file.
* <p>
Expand All @@ -78,11 +77,11 @@
* These files might be in gzip compressed format (when file name is ending with ".gz").
* </p>
* <p>
* Alternatively, for larger inputs you can provide a collection of FASTQ files indexed by their name (see <code>USE_SEQUENCIAL_FASTQ</code> for details below).
* Alternatively, for larger inputs you can provide a collection of FASTQ files indexed by their name (see <code>USE_SEQUENTIAL_FASTQ</code> for details below).
* </p>
* <p>
* By default, this tool will try to guess the base quality score encoding. However you can indicate it explicitly
* using the <code>QUALITY_FORMAT</code> argument.
* using the <code>QUALITY_FORMAT</code> argument, and must do so if inputs are not a regular file (e.g. stdin).
* </p>
* <h3>Output</h3>
* A single unaligned BAM or SAM file. By default, the records are sorted by query (read) name.
Expand Down Expand Up @@ -130,9 +129,9 @@ public class FastqToSam extends CommandLineProgram {
"<p>One FASTQ file name for single-end or two for pair-end sequencing input data. " +
"These files might be in gzip compressed format (when file name is ending with \".gz\").</p>" +
"<p>Alternatively, for larger inputs you can provide a collection of FASTQ files indexed by their name " +
"(see USE_SEQUENCIAL_FASTQ for details below).</p>" +
"(see USE_SEQUENTIAL_FASTQ for details below).</p>" +
"<p>By default, this tool will try to guess the base quality score encoding. However you can indicate it explicitly " +
"using the QUALITY_FORMAT argument.</p>" +
"using the QUALITY_FORMAT argument, and must do so if inputs are not a regular file (e.g. stdin).</p>" +
"<h3>Output</h3>" +
"<p>A single unaligned BAM or SAM file. By default, the records are sorted by query (read) name.</p>" +
"<h3>Usage examples</h3>" +
Expand Down Expand Up @@ -176,7 +175,8 @@ public class FastqToSam extends CommandLineProgram {

@Argument(shortName="V", doc="A value describing how the quality values are encoded in the input FASTQ file. " +
"Either Solexa (phred scaling + 66), Illumina (phred scaling + 64) or Standard (phred scaling + 33). " +
"If this value is not specified, the quality format will be detected automatically.", optional = true)
"If input is from a regular file and this value is not specified, the quality format will be detected automatically. " +
"If input is not from a regular file, this value is required.", optional = true)
public FastqQualityFormat QUALITY_FORMAT;

@Argument(doc="Output BAM/SAM/CRAM file. ", shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME)
Expand Down Expand Up @@ -239,6 +239,9 @@ public class FastqToSam extends CommandLineProgram {

private static final SolexaQualityConverter solexaQualityConverter = SolexaQualityConverter.getSingleton();

// tested for and set in customCommandLineValidation
private Boolean regularFileInput;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment explaining that this is tested/set in customCommandlineValidation. That's exactly what I asked you to do but it's a bit of an unexpected place to do initialization so it might be worth mentioning.


/**
* Looks at fastq input(s) and attempts to determine the proper quality format
*
Expand Down Expand Up @@ -329,10 +332,12 @@ protected int doWork() {

final FastqReader reader1 = fileToFastqReader(FASTQ.toPath());
if (reader1.hasNext()) {
// Set the quality format
QUALITY_FORMAT = FastqToSam.determineQualityFormat(reader1,
(FASTQ2 == null) ? null : fileToFastqReader(FASTQ2.toPath()),
QUALITY_FORMAT);
if (regularFileInput) {
// Set the quality format
QUALITY_FORMAT = FastqToSam.determineQualityFormat(reader1,
(FASTQ2 == null) ? null : fileToFastqReader(FASTQ2.toPath()),
delocalizer marked this conversation as resolved.
Show resolved Hide resolved
QUALITY_FORMAT);
}
} else {
if (ALLOW_EMPTY_FASTQ) {
LOG.warn("Input FASTQ is empty, will write out SAM/BAM file with no reads.");
Expand Down Expand Up @@ -360,7 +365,8 @@ protected int doWork() {
}
}
else {
readers1.add(fileToFastqReader(FASTQ.toPath()));
// use the already opened reader1 if input is STDIN or a named pipe
readers1.add(regularFileInput ? fileToFastqReader(FASTQ.toPath()) : reader1);
if (FASTQ2 != null) {
readers2.add(fileToFastqReader(FASTQ2.toPath()));
}
Expand Down Expand Up @@ -405,7 +411,7 @@ protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer)
final ProgressLogger progress = new ProgressLogger(LOG);
for ( ; freader.hasNext() ; readCount++) {
final FastqRecord frec = freader.next();
final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadHeader()) , frec, false) ;
final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadName()) , frec, false) ;
srec.setReadPairedFlag(false);
writer.addAlignment(srec);
progress.record(srec);
Expand All @@ -422,8 +428,8 @@ protected int doPaired(final FastqReader freader1, final FastqReader freader2, f
final FastqRecord frec1 = freader1.next();
final FastqRecord frec2 = freader2.next();

final String frec1Name = SequenceUtil.getSamReadNameFromFastqHeader(frec1.getReadHeader());
final String frec2Name = SequenceUtil.getSamReadNameFromFastqHeader(frec2.getReadHeader());
final String frec1Name = SequenceUtil.getSamReadNameFromFastqHeader(frec1.getReadName());
final String frec2Name = SequenceUtil.getSamReadNameFromFastqHeader(frec2.getReadName());
final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);

final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true) ;
Expand Down Expand Up @@ -462,7 +468,7 @@ private SAMRecord createSamRecord(final SAMFileHeader header, final String baseN
final int uQual = qual & 0xff;
if (uQual < MIN_Q || uQual > MAX_Q) {
throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
MAX_Q + " for read " + frec.getReadHeader());
MAX_Q + " for read " + frec.getReadName());
}
}
srec.setBaseQualities(quals);
Expand Down Expand Up @@ -582,6 +588,11 @@ private String error(final FastqReader freader, final String str) {
protected String[] customCommandLineValidation() {
if (MIN_Q < 0) return new String[]{"MIN_Q must be >= 0"};
if (MAX_Q > SAMUtils.MAX_PHRED_SCORE) return new String[]{"MAX_Q must be <= " + SAMUtils.MAX_PHRED_SCORE};
regularFileInput = !PicardHtsPath.isOther(FASTQ) && (FASTQ2 == null || !PicardHtsPath.isOther(FASTQ2));
if (QUALITY_FORMAT == null && !regularFileInput) {
return new String[]{"QUALITY_FORMAT must be specified when either of FASTQ or FASTQ2 is not a regular file"};
}
return null;
}

}
62 changes: 62 additions & 0 deletions src/test/java/picard/sam/FastqToSamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@
import picard.cmdline.CommandLineProgramTest;
import picard.PicardException;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
Expand All @@ -47,6 +49,8 @@
*/
public class FastqToSamTest extends CommandLineProgramTest {
private static final File TEST_DATA_DIR = new File("testdata/picard/sam/fastq2bam");
private static final String CLASSPATH = "\"" + System.getProperty("java.class.path") + "\" ";


public String getCommandLineProgramName() {
return FastqToSam.class.getSimpleName();
Expand Down Expand Up @@ -177,6 +181,64 @@ public void testEmptyFastqAllowed() throws IOException {
convertFile(emptyFastq, null, FastqQualityFormat.Illumina, false, false, true);
}

@Test
public void testStreamInputWithoutQuality() throws IOException {
ByteArrayOutputStream stderrStream = new ByteArrayOutputStream();
PrintStream newStderr = new PrintStream(stderrStream);
PrintStream oldStderr = System.err;
try {
System.setErr(newStderr);
final File tmpFile = File.createTempFile("empty", ".sam");
final String[] args = {
"FASTQ=/dev/stdin",
"SAMPLE_NAME=sample001",
"OUTPUT=" + tmpFile
};
Assert.assertEquals(runPicardCommandLine(args), 1);
} finally {
System.setErr(oldStderr);
}
Assert.assertTrue(stderrStream.toString().endsWith("QUALITY_FORMAT must be specified when either of FASTQ or FASTQ2 is not a regular file\n"));
}

@Test
public void testStreamInput() throws IOException {
delocalizer marked this conversation as resolved.
Show resolved Hide resolved
final File output = newTempSamFile("stdin");
String fastq = """
@ERR194147.10008417/1
ATTTAATTAAGAAAATGTAAACTAAATGACAGTAGACAGACAAGTATGCCTTTGC
+
???????????????????????????????????????????????????????
""";
String[] command = {
"/bin/bash",
"-c",
"echo -n '" + fastq + "'|" +
"java -classpath " +
CLASSPATH +
"picard.cmdline.PicardCommandLine " +
"FastqToSam " +
"-FASTQ /dev/stdin " +
"-QUALITY_FORMAT Standard " +
"-SAMPLE_NAME sample001 " +
"-OUTPUT " + output
};

try {
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.inheritIO();
Process process = processBuilder.start();
Assert.assertEquals(process.waitFor(), 0);
} catch (Exception e) {
Assert.fail("Failed to pipe data from stdin to FastqToSam", e);
}

try(final SamReader samReader = SamReaderFactory.makeDefault().open(output)) {
long actualCount = samReader.iterator().stream().count();
Assert.assertEquals(actualCount, 1);
}
}

private File convertFile(final String filename, final FastqQualityFormat version) throws IOException {
return convertFile(filename, null, version);
}
Expand Down