diff --git a/.gitignore b/.gitignore
index 43091aa..020ed03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
__pycache__/
*.py[cod]
*$py.class
+*.DS_Store
+*.pk
# C extensions
*.so
diff --git a/HISTORY.rst b/HISTORY.rst
index 240dacc..3f74c57 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,7 +2,7 @@
History
=======
-0.1.0 (2019-12-06)
+0.1.1 (2021-01-07)
------------------
* First release on PyPI.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8c9e616
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+# biometrics
+
+Package to generate sample based biometrics
+
+[![Build Status](https://travis-ci.com/msk-access/biometrics.svg?branch=master)](https://travis-ci.com/msk-access/biometrics) [![PyPi](https://img.shields.io/pypi/v/biometrics.svg?)](https://pypi.python.org/pypi/biometrics)
+
+* Free software: Apache Software License 2.0
+* Documentation: https://msk-access.gitbook.io/biometrics/
+
+## Installation
+
+From pypi:
+
+`pip install biometrics`
diff --git a/README.rst b/README.rst
deleted file mode 100644
index 94ec0fa..0000000
--- a/README.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-==========
-biometrics
-==========
-
-
-.. image:: https://img.shields.io/pypi/v/biometrics.svg
- :target: https://pypi.python.org/pypi/biometrics
-
-.. image:: https://img.shields.io/travis/rhshah/biometrics.svg
- :target: https://travis-ci.org/rhshah/biometrics
-
-.. image:: https://readthedocs.org/projects/biometrics/badge/?version=latest
- :target: https://biometrics.readthedocs.io/en/latest/?badge=latest
- :alt: Documentation Status
-
-
-
-
-Package to generate sample based biometrics
-
-
-* Free software: Apache Software License 2.0
-* Documentation: https://biometrics.readthedocs.io.
-
-
-Features
---------
-
-* TODO
-
-Credits
--------
-
-This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
-
-.. _Cookiecutter: https://github.com/audreyr/cookiecutter
-.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
diff --git a/biometrics/__init__.py b/biometrics/__init__.py
index 581186c..d5ad913 100644
--- a/biometrics/__init__.py
+++ b/biometrics/__init__.py
@@ -2,4 +2,4 @@
__author__ = """Ronak Shah"""
__email__ = 'rons.shah@gmail.com'
-__version__ = '0.1.0'
+__version__ = '0.1.1'
diff --git a/biometrics/biometrics.py b/biometrics/biometrics.py
old mode 100644
new mode 100755
index dd0b80e..dafa4c7
--- a/biometrics/biometrics.py
+++ b/biometrics/biometrics.py
@@ -1 +1,311 @@
-"""Main module."""
+import os
+import glob
+
+import pandas as pd
+
+from biometrics.sample import Sample
+from biometrics.extract import Extract
+from biometrics.genotype import Genotyper
+from biometrics.minor_contamination import MinorContamination
+from biometrics.major_contamination import MajorContamination
+from biometrics.sex_mismatch import SexMismatch
+from biometrics.utils import standardize_sex_nomenclature, exit_error
+
+
+def write_to_file(args, data, basename):
+ """
+ Generic function to save output to a file.
+ """
+
+ outdir = os.path.abspath(args.outdir)
+
+ outpath = os.path.join(outdir, basename + '.csv')
+ data.to_csv(outpath, index=False)
+
+ if args.json:
+ outpath = os.path.join(outdir, basename + '.json')
+ data.to_json(outpath)
+
+
+def run_extract(args, samples):
+ """
+ Extract the pileup and region information from the samples. Then
+ save to the database.
+ """
+
+ extractor = Extract(args=args)
+ samples = extractor.extract(samples)
+
+ return samples
+
+
+def run_sexmismatch(args, samples):
+ """
+ Find and sex mismatches and save the output
+ """
+
+ sex_mismatch = SexMismatch(args.coverage_threshold)
+
+ results = sex_mismatch.detect_mismatch(samples)
+ write_to_file(args, results, 'sex_mismatch')
+
+
+def run_minor_contamination(args, samples):
+ """
+ Compute minor contamination and save the output and figure
+ """
+
+ minor_contamination = MinorContamination(threshold=args.minor_threshold)
+ samples = minor_contamination.estimate(samples)
+
+ data = minor_contamination.to_dataframe(samples)
+ write_to_file(args, data, 'minor_contamination')
+
+ if args.plot:
+ if len(samples) > 1000:
+ print('WARNING - Turning off plotting functionality. You are trying to plot more than 1000 samples, which is too cumbersome.')
+ else:
+ minor_contamination.plot(data, args.outdir)
+
+ return samples
+
+
+def run_major_contamination(args, samples):
+ """
+ Compute major contamination and save the output and figure.
+ """
+
+ major_contamination = MajorContamination(threshold=args.major_threshold)
+ samples = major_contamination.estimate(samples)
+
+ data = major_contamination.to_dataframe(samples)
+ write_to_file(args, data, 'major_contamination')
+
+ if args.plot:
+ if len(samples) > 1000:
+ print('WARNING - Turning off plotting functionality. You are trying to plot more than 1000 samples, which is too cumbersome.')
+ else:
+ major_contamination.plot(data, args.outdir)
+
+ return samples
+
+
+def run_genotyping(args, samples):
+ """
+ Run the genotyper and save the output and figure.
+ """
+
+ genotyper = Genotyper(
+ no_db_compare=args.no_db_compare,
+ discordance_threshold=args.discordance_threshold,
+ threads=args.threads,
+ zmin=args.zmin,
+ zmax=args.zmax)
+ data = genotyper.genotype(samples)
+
+ write_to_file(args, data, 'genotype_comparison')
+
+ if args.plot:
+ if len(samples) > 1000:
+ print('WARNING - Turning off plotting functionality. You are trying to plot more than 1000 samples, which is too cumbersome.')
+ else:
+ genotyper.plot(data, args.outdir)
+
+ return samples
+
+
+def load_input_sample_from_db(sample_name, database):
+ """
+ Loads any the given (that the user specified via the CLI) from the
+ database.
+ """
+
+ extraction_file = os.path.join(database, sample_name + '.pk')
+
+ if not os.path.exists(extraction_file):
+ exit_error(
+ 'Could not find: {}. Please rerun the extraction step.'.format(
+ extraction_file))
+
+ sample = Sample(query_group=False)
+ sample.load_from_file(extraction_file)
+
+ return sample
+
+
+def load_database_samples(database, existing_samples):
+ """
+ Loads any samples that are already present in the database AND
+ which were not specified as input via the CLI.
+ """
+
+ samples = {}
+
+ for pickle_file in glob.glob(os.path.join(database, '*pk')):
+
+ sample_name = os.path.basename(pickle_file).replace('.pk', '')
+
+ if sample_name in existing_samples:
+ continue
+
+ sample = Sample(db=database, query_group=True)
+ sample.load_from_file(extraction_file=pickle_file)
+
+ samples[sample.sample_name] = sample
+
+ return samples
+
+
+def get_samples_from_input(input, database, extraction_mode):
+ """
+ Parse the sample information from the user-supplied CSV file.
+ """
+
+ samples = {}
+
+ for fpath in input:
+
+ input = pd.read_csv(fpath, sep=',')
+
+ # check if some required columns are present
+
+ if 'sample_bam' not in input.columns:
+ exit_error(
+ 'Input file does not have the \'sample_bam\' column.')
+
+ if 'sample_name' not in input.columns:
+ exit_error('Input does not have \'sample_name\' column.')
+
+ input = input.to_dict(orient='records')
+
+ for row in input:
+
+ if not extraction_mode:
+ # if not running extract tool, then just need to get
+ # the sample name
+
+ sample_name = row['sample_name']
+
+ sample = load_input_sample_from_db(sample_name, database)
+ samples[sample.sample_name] = sample
+
+ continue
+
+ # parse in the input
+
+ sample = Sample(
+ sample_name=row['sample_name'],
+ sample_bam=row['sample_bam'],
+ sample_group=row.get('sample_group'),
+ sample_type=row.get('sample_type'),
+ sample_sex=standardize_sex_nomenclature(input.get('sample_sex')),
+ db=database)
+
+ samples[sample.sample_name] = sample
+
+ return samples
+
+
+def get_samples_from_bam(args):
+ """
+ Parse the sample information the user supplied via the CLI.
+ """
+
+ samples = {}
+
+ for i, sample_bam in enumerate(args.sample_bam):
+
+ sample_sex = standardize_sex_nomenclature(
+ args.sample_sex[i] if args.sample_sex is not None else None)
+ sample_name = args.sample_name[i] if args.sample_name is not None else None
+ sample_group = args.sample_group[i] \
+ if args.sample_group is not None else None
+ sample_type = args.sample_type[i] \
+ if args.sample_type is not None else None
+
+ sample = Sample(
+ sample_bam=sample_bam, sample_group=sample_group,
+ sample_name=sample_name, sample_type=sample_type,
+ sample_sex=sample_sex, db=args.database)
+
+ samples[sample.sample_name] = sample
+
+ return samples
+
+
+def get_samples_from_name(sample_names, database):
+ """
+ Parse the sample information the user supplied via the CLI.
+ """
+
+ samples = {}
+
+ for i, sample_name in enumerate(sample_names):
+ sample = load_input_sample_from_db(sample_name, database)
+ samples[sample.sample_name] = sample
+
+ return samples
+
+
+def get_samples(args, extraction_mode=False):
+ """
+ Parse the sample information the user supplied via the CLI.
+ """
+
+ samples = {}
+
+ if args.input:
+ samples.update(get_samples_from_input(
+ args.input, args.database, extraction_mode))
+
+ if extraction_mode:
+ if args.sample_bam:
+ samples.update(get_samples_from_bam(args))
+ else:
+ if args.sample_name:
+ samples.update(get_samples_from_name(
+ args.sample_name, args.database))
+
+ for sample_name in samples.keys():
+ extration_file = os.path.join(args.database, sample_name + '.pk')
+ samples[sample_name].load_from_file(extration_file)
+
+ existing_samples = set([i for i in samples.keys()])
+
+ if not args.no_db_compare:
+ samples.update(load_database_samples(
+ args.database, existing_samples))
+
+ return samples
+
+
+def create_outdir(outdir):
+ os.makedirs(outdir, exist_ok=True)
+
+
+def run_biometrics(args):
+ """
+ Decide what tool to run based in CLI input.
+ """
+
+ extraction_mode = args.subparser_name == 'extract'
+
+ samples = get_samples(args, extraction_mode=extraction_mode)
+
+ # if not extraction_mode and args.plot:
+
+ if extraction_mode:
+ create_outdir(args.database)
+ run_extract(args, samples)
+ elif args.subparser_name == 'sexmismatch':
+ create_outdir(args.outdir)
+ run_sexmismatch(args, samples)
+ elif args.subparser_name == 'minor':
+ create_outdir(args.outdir)
+ run_minor_contamination(args, samples)
+ elif args.subparser_name == 'major':
+ create_outdir(args.outdir)
+ run_major_contamination(args, samples)
+ elif args.subparser_name == 'genotype':
+ create_outdir(args.outdir)
+ run_genotyping(args, samples)
diff --git a/biometrics/cli.py b/biometrics/cli.py
index 24caa03..0924fc7 100644
--- a/biometrics/cli.py
+++ b/biometrics/cli.py
@@ -1,16 +1,216 @@
+#!/usr/bin/env python
"""Console script for biometrics."""
+
import sys
-import click
+import argparse
+
+from utils import exit_error
+from biometrics.biometrics import run_biometrics
+
+
+def add_extraction_args(parser):
+
+ parser.add_argument(
+ '-i', '--input', action="append", required=False,
+ help='''Path to file containing sample information (one per line).
+ For example: sample_name,sample_bam,sample_type,sample_sex,sample_group''')
+ parser.add_argument(
+ '-sb', '--sample-bam', nargs="+", required=False,
+ help='''Space-delimited list of BAM files.''')
+ parser.add_argument(
+ '-st', '--sample-type', nargs="+", required=False,
+ help='''Space-delimited list of sample types: Normal or Tumor.
+ Must be in the same order as --sample-bam.''')
+ parser.add_argument(
+ '-ss', '--sample-sex', nargs="+", required=False,
+ help='''Space-delimited list of sample sex (i.e. M or F). Must be
+ in the same order as --sample-bam.''')
+ parser.add_argument(
+ '-sg', '--sample-group', nargs="+", required=False,
+ help='''Space-delimited list of sample group information
+ (e.g. sample patient ID). Must be in the same order as --sample-bam.''')
+ parser.add_argument(
+ '-sn', '--sample-name', nargs="+", required=False,
+ help='''Space-delimited list of sample names. If not specified,
+ sample name is automatically figured out from the BAM file. Must
+ be in the same order as --sample-bam.''')
+ parser.add_argument(
+ '--vcf', required=False,
+ help='''VCF file containing the sites to be queried.''')
+ parser.add_argument(
+ '--bed', required=False,
+ help='''BED file containing the intervals to be queried.''')
+ parser.add_argument(
+ '-db', '--database', required=True,
+ help='''Directory to store the intermediate files after
+ running the extraction step.''')
+ parser.add_argument(
+ '-ov', '--overwrite', action='store_true',
+ help='''Overwrite any existing extraction results.''')
+ parser.add_argument(
+ '-f', '--fafile', required=True,
+ help='''Path to reference fasta file.''')
+ parser.add_argument(
+ '-q', '--min-mapping-quality', default=1, type=int,
+ help='''Minimum mapping quality of reads to be used for pileup.''')
+ parser.add_argument(
+ '-Q', '--min-base-quality', default=1, type=int,
+ help='''Minimum base quality of reads to be used for pileup.''')
+ parser.add_argument(
+ '-mc', '--min-coverage', default=10, type=int,
+ help='''Minimum coverage to count a site.''')
+ parser.add_argument(
+ '--default-genotype', default=None,
+ help='''Default genotype if coverage is too low (options are Het or Hom).''')
+ parser.add_argument(
+ '-t', '--threads', default=1, type=int,
+ help='''Number of threads to use to extract the samples.''')
+
+ return parser
+
+
+def add_common_tool_args(parser):
+ parser.add_argument(
+ '-sn', '--sample-name', nargs="+", required=False,
+ help='''Space-delimited list of sample names to analyze.
+ Assumes the samples have already been extracted.''')
+ parser.add_argument(
+ '-i', '--input', action="append", required=False,
+ help='''Path to file containing sample information (one per line).
+ For example: sample_name,sample_bam,sample_type,sample_sex,sample_group.''')
+ parser.add_argument(
+ '-db', '--database', required=True,
+ help='''Directory to store the intermediate files after
+ running the extraction step.''')
+ parser.add_argument(
+ '-o', '--outdir', default='.',
+ help='''Output directory for results.''')
+ parser.add_argument(
+ '-j', '--json', action='store_true',
+ help='''Also output data in JSON format.''')
+ parser.add_argument(
+ '-nc', '--no-db-compare', action='store_true',
+ help='''Do not compare the sample(s) you provided to all samples
+ in the database, only compare them with each other.''')
+
+ return parser
+
+
+def check_arg_equal_len(vals1, vals2, name):
+ if vals2 is not None and len(vals1) != len(vals2):
+ exit_error(
+ '{} does not have the same number of items as --sample-bam'.format(
+ name))
+
+
+def check_args(args):
+
+ if args.subparser_name != 'extract' and \
+ not args.input and not args.sample_name:
+ exit_error('You must specify either --input or --sample-name')
+
+ if args.subparser_name == 'extract' and \
+ not args.input and not args.sample_bam:
+ exit_error(
+ 'The extraction tool requires that you specify either ' +
+ '--input or --sample-bam')
+
+ if args.sample_name:
+ check_arg_equal_len(args.sample_name, args.sample_bam, '--sample-bam')
+ check_arg_equal_len(args.sample_name, args.sample_type, '--sample-type')
+ check_arg_equal_len(
+ args.sample_name, args.sample_group, '--sample-group')
+ check_arg_equal_len(args.sample_name, args.sample_sex, '--sample-sex')
+
+
+def get_args():
+
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description='''Various tools for fingerprinting samples from BAM files.
+ Sample information to each sub command is supplied via input file(s)
+ and/or as individual samples.''')
+ subparsers = parser.add_subparsers(help='', dest="subparser_name")
+
+ # extract parser
+
+ parser_extract = subparsers.add_parser(
+ 'extract',
+ help='''Intermediate step to extract genotype info from one or more
+ samples. The output from this step is required for the rest of the
+ fingerprinting tools. However, you do not need to run this step
+ manually since it will run automatically if the necessary files
+ are missing.''')
+ parser_extract = add_extraction_args(parser_extract)
+
+ # sex mismatch parser
+
+ parser_sexmismatch = subparsers.add_parser(
+ 'sexmismatch', help='Check for sex mismatches.')
+ parser_sexmismatch = add_common_tool_args(parser_sexmismatch)
+ parser_sexmismatch.add_argument(
+ '--coverage-threshold', default=50, type=int,
+ help='''Samples with Y chromosome above this value will be considered male.''')
+
+ # minor contamination parser
+
+ parser_minor = subparsers.add_parser(
+ 'minor', help='Check for minor contamination.')
+ parser_minor = add_common_tool_args(parser_minor)
+ parser_minor.add_argument(
+ '-p', '--plot', action='store_true',
+ help='''Also output plots of the data.''')
+ parser_minor.add_argument(
+ '--minor-threshold', default=0.002, type=float,
+ help='''Minor contamination threshold for bad sample.''')
+
+ # major contamination parser
+
+ parser_major = subparsers.add_parser(
+ 'major', help='Check for major contamination.')
+ parser_major = add_common_tool_args(parser_major)
+ parser_major.add_argument(
+ '-p', '--plot', action='store_true',
+ help='''Also output plots of the data.''')
+ parser_major.add_argument(
+ '--major-threshold', default=0.6, type=float,
+ help='''Major contamination threshold for bad sample.''')
+
+ # genotyping parser
+
+ parser_genotype = subparsers.add_parser(
+ 'genotype', help='Genotype a set of samples.')
+ parser_genotype = add_common_tool_args(parser_genotype)
+ parser_genotype.add_argument(
+ '-p', '--plot', action='store_true',
+ help='''Also output plots of the data.''')
+ parser_genotype.add_argument(
+ '--discordance-threshold', default=0.05, type=float,
+ help='''Discordance values less than this are regarded
+ as matching samples.''')
+ parser_genotype.add_argument(
+ '-t', '--threads', default=1, type=int,
+ help='''Number of threads to use to extract the samples.''')
+ parser_genotype.add_argument(
+ '--zmin', type=float,
+ help='''Minimum z value for the colorscale on the heatmap.''')
+ parser_genotype.add_argument(
+ '--zmax', type=float,
+ help='''Maximum z value for the colorscale on the heatmap.''')
+
+ args = parser.parse_args()
+
+ check_args(args)
+
+ return args
+
+
+def main():
+ args = get_args()
-@click.command()
-def main(args=None):
- """Console script for biometrics."""
- click.echo("Replace this message by putting your code into "
- "biometrics.cli.main")
- click.echo("See click documentation at https://click.palletsprojects.com/")
- return 0
+ run_biometrics(args)
if __name__ == "__main__":
- sys.exit(main()) # pragma: no cover
+ sys.exit(main())
diff --git a/biometrics/extract.py b/biometrics/extract.py
new file mode 100644
index 0000000..aaa9d52
--- /dev/null
+++ b/biometrics/extract.py
@@ -0,0 +1,333 @@
+import os
+from multiprocessing import Pool
+
+import pandas as pd
+import numpy as np
+import vcf
+from pysam import AlignmentFile
+
+
+HETEROZYGOUS_THRESHOLD = 0.1
+
+
+class Extract:
+ """
+ Class for extracting genotype information from alignment file using
+ the user supplied VCF file.
+ """
+
+ def __init__(self, args):
+ self.db = args.database
+ self.threads = args.threads
+ self.min_mapping_quality = args.min_mapping_quality
+ self.min_base_quality = args.min_base_quality
+ self.default_genotype = args.default_genotype
+ self.vcf = args.vcf
+ self.bed = args.bed
+ self.fafile = args.fafile
+ self.overwrite = args.overwrite
+ self.min_coverage = args.min_coverage
+ self.sites = []
+ self.regions = None
+
+ self._parse_vcf()
+ self._parse_bed_file()
+
+ def _parse_vcf(self):
+
+ if self.vcf is None:
+ return
+
+ for record in vcf.Reader(open(self.vcf, 'r')):
+ self.sites.append({
+ 'chrom': record.CHROM,
+ 'start': record.POS-1,
+ 'end': record.POS,
+ 'ref_allele': str(record.REF),
+ 'alt_allele': str(record.ALT[0])
+ })
+
+ def _parse_bed_file(self):
+
+ if self.bed is None:
+ return
+
+ self.regions = pd.read_csv(self.bed, sep='\t', header=None)
+ self.regions.columns = range(self.regions.shape[1])
+
+ def _extract_regions(self, sample):
+ """
+ Code to extract the coverage information for the regions listed
+ in the BED file.
+ """
+
+ if self.regions is None:
+ return sample
+
+ # get the pileup
+
+ bam = AlignmentFile(sample.sample_bam)
+ region_counts = []
+
+ for i in self.regions.index:
+
+ chrom = self.regions.at[i, 0]
+ start = int(self.regions.at[i, 1])
+ end = int(self.regions.at[i, 2])
+
+ count = bam.count(chrom, start, end)
+
+ region_counts.append({
+ 'chrom': chrom,
+ 'start': start,
+ 'end': end,
+ 'count': count})
+
+ region_counts = pd.DataFrame(region_counts)
+
+ sample.region_counts = region_counts
+
+ return sample
+
+ def _get_minor_allele_freq(self, allele_counts):
+
+ coverage = sum(allele_counts)
+
+ if coverage < self.min_coverage or coverage == 0:
+ return np.nan
+ else:
+ return min(allele_counts) / coverage
+
+ def _get_genotype_class(self, minor_allele_freq):
+ """
+ Determine if Het, Hom, or unknown/NA.
+ """
+
+ if pd.isna(minor_allele_freq):
+
+ if self.default_genotype is not None:
+ return self.default_genotype
+
+ return np.nan
+ else:
+ if minor_allele_freq <= HETEROZYGOUS_THRESHOLD:
+ return 'Hom'
+ else:
+ return 'Het'
+
+ def _get_genotype(self, genotype, allele_counts, alleles):
+ """
+ Get the genotype in terms of the allele(s) (e.g. A, T, AT, GC, etc.)
+ """
+
+ if pd.isna(genotype):
+ return np.nan
+ elif genotype == 'Het':
+ return ''.join(alleles)
+ else:
+ if allele_counts[0] > allele_counts[1]:
+ return alleles[0]
+ else:
+ return alleles[1]
+
+ def _get_genotype_info(self, pileup_site, ref_allele, alt_allele):
+ """
+ Plot minor contamination data.
+ """
+
+ allele_counts = [pileup_site[ref_allele], pileup_site[alt_allele]]
+
+ pileup_site['minor_allele_freq'] = self._get_minor_allele_freq(
+ allele_counts)
+
+ pileup_site['genotype_class'] = self._get_genotype_class(
+ pileup_site['minor_allele_freq'])
+
+ pileup_site['genotype'] = self._get_genotype(
+ pileup_site['genotype_class'], allele_counts,
+ [ref_allele, alt_allele])
+
+ return pileup_site
+
+ def _add_base(self, site, old_base, old_base_qual, new_base,
+ new_base_qual):
+ """
+ This function is for dealing with the various scenarios that can
+ arise when a read pair overlaps and how to handle when the
+ bases mismatch. The 'old_base' refers to the first base observed when
+ computing pileup information (usually the forward read). Then the
+ 'new_base' is from the second read in the overlaping pair.
+ """
+
+ if old_base is None:
+ return [new_base, new_base_qual]
+
+ if old_base == new_base:
+ return [old_base, old_base_qual]
+
+ if old_base != 'N' and new_base != 'N':
+ if new_base == site['ref_allele']:
+ return [new_base, new_base_qual]
+ else:
+ return [old_base, old_base_qual]
+
+ if old_base == site['ref_allele']:
+ return [old_base, old_base_qual]
+ elif new_base == site['ref_allele']:
+ return [new_base, new_base_qual]
+ elif old_base == site['alt_allele'] and old_base_qual >= '!':
+ return [old_base, old_base_qual]
+ elif new_base == site['alt_allele'] and new_base_qual >= '!':
+ return [new_base, new_base_qual]
+ else:
+ return ['N', '&']
+
+ def _pileup(self, bam, site):
+ """
+ Get the per-site pileup information.
+ """
+
+ read_data = {}
+ allele_counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0, 'N': 0}
+
+ for pileupcolumn in bam.pileup(
+ contig=site['chrom'], start=site['start'], end=site['end'],
+ truncate=True, max_depth=30000, stepper='nofilter',
+ min_base_quality=self.min_base_quality):
+
+ for pileupread in pileupcolumn.pileups:
+
+ if pileupread.query_position is None:
+ continue
+
+ mapq = pileupread.alignment.mapping_quality
+ read_name = pileupread.alignment.qname
+ base_qual = pileupread.alignment.qual[pileupread.query_position]
+ base = pileupread.alignment.query_sequence[pileupread.query_position]
+
+ if (mapq < self.min_mapping_quality) or pileupread.is_refskip or pileupread.is_del:
+ # skip the read if its mapping quality is too low
+ # or if the site is part of an indel
+ continue
+
+ if read_name in read_data and read_data[read_name][0] == 'N':
+ continue
+ elif read_name in read_data:
+ vals = self._add_base(
+ site, read_data[read_name][0],
+ read_data[read_name][1], base, base_qual)
+ read_data[read_name] = vals[0:2]
+ else:
+ read_data[read_name] = [base, base_qual]
+
+ total = 0
+ matches = 0
+ mismatches = 0
+ for read, base_data in read_data.items():
+
+ allele_counts[base_data[0]] += 1
+ total += 1
+
+ if base_data[0] == site['ref_allele']:
+ matches += 1
+ else:
+ mismatches += 1
+
+ return {
+ 'chrom': site['chrom'],
+ 'pos': site['end'],
+ 'ref': site['ref_allele'],
+ 'alt': site['alt_allele'],
+ 'reads_all': total,
+ 'matches': matches,
+ 'mismatches': mismatches,
+ 'A': allele_counts['A'],
+ 'C': allele_counts['C'],
+ 'T': allele_counts['T'],
+ 'G': allele_counts['G'],
+ 'N': allele_counts['N']
+ }
+
+ def _extract_sites(self, sample):
+ """
+ Loop through all positions and get pileup information.
+ """
+
+ if not self.sites:
+ return sample
+
+ # get the pileup
+
+ bam = AlignmentFile(sample.sample_bam)
+ pileup = pd.DataFrame()
+
+ for site in self.sites:
+
+ pileup_site = self._pileup(bam, site)
+
+ pileup_site = self._get_genotype_info(
+ pileup_site, site['ref_allele'], site['alt_allele'])
+
+ pileup = pileup.append(pileup_site, ignore_index=True)
+
+ pileup = pileup[[
+ 'chrom', 'pos', 'ref', 'alt', 'reads_all', 'matches', 'mismatches',
+ 'A', 'C', 'T', 'G', 'N', 'minor_allele_freq', 'genotype_class',
+ 'genotype']]
+
+ for col in ['pos', 'A', 'C', 'T', 'G', 'N', 'matches', 'mismatches', 'reads_all']:
+ pileup[col] = pileup[col].astype(int)
+
+ sample.pileup = pileup
+
+ return sample
+
+ def _extraction_job(self, sample):
+ """
+ Function to do the extraction steps for a single sample.
+ Supposed to be called by multiprocessing functions to parallelize it.
+ """
+
+ sample = self._extract_sites(sample)
+ sample = self._extract_regions(sample)
+ sample.save_to_file()
+
+ return sample
+
+ def extract(self, samples):
+ """
+ Function to call to extract the pileup and region information
+ for the given samples.
+ """
+
+ if type(samples) != dict:
+ samples = {samples.sample_name: samples}
+
+ # determine with samples need to be extracted, and put them in a list
+
+ samples_to_extract = []
+
+ for sample_name, sample in samples.items():
+
+ # if extraction file exists then load it
+
+ if os.path.exists(sample.extraction_file) and not self.overwrite:
+ sample.load_from_file()
+ continue
+
+ samples_to_extract.append(sample)
+
+ # if any samples need to be extracted, then do so
+ # (using multiprocessing)
+
+ if len(samples_to_extract) > 0:
+
+ thread_pool = Pool(self.threads)
+
+ samples_processed = thread_pool.map(
+ self._extraction_job, samples_to_extract)
+
+ for sample in samples_processed:
+ samples[sample.sample_name] = sample
+
+ return samples
diff --git a/biometrics/genotype.py b/biometrics/genotype.py
new file mode 100644
index 0000000..088702e
--- /dev/null
+++ b/biometrics/genotype.py
@@ -0,0 +1,228 @@
+import os
+from multiprocessing import Pool
+
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+
+from utils import exit_error
+
+EPSILON = 1e-9
+
+
+class Genotyper:
+
+ def __init__(self, no_db_compare, discordance_threshold=0.05, threads=1, zmin=None, zmax=None):
+ self.no_db_compare = no_db_compare
+ self.discordance_threshold = discordance_threshold
+ self.threads = threads
+ self.zmax = zmax
+ self.zmin = zmin
+ self.sample_type_ratio = 1
+
+ def are_samples_same_group(self, sample1, sample2):
+
+ if sample1.sample_group is None or sample2.sample_group is None:
+ return np.nan
+
+ if sample1.sample_group == sample2.sample_group:
+ return True
+ else:
+ return False
+
+ def _plot_heatmap(self, data, outdir, name, title="Discordance calculations between samples", size_ratio=None):
+
+ width = None
+ height = None
+
+ if size_ratio is not None and size_ratio != 1:
+ width = 1400
+ height = (width * size_ratio)/4
+
+ fig = go.Figure()
+ fig.add_trace(
+ go.Heatmap(
+ x=data['ReferenceSample'],
+ y=data['QuerySample'],
+ z=data['DiscordanceRate'],
+ legendgroup="Discordance",
+ name='Discordance',
+ customdata=data.to_numpy(),
+ hovertemplate='Reference sample: %{customdata[0]}' +
+ '
Query sample: %{customdata[1]}' +
+ '
Homozygous count in reference: %{customdata[3]}' +
+ '
Total match count: %{customdata[4]}' +
+ '
Homozygous match count: %{customdata[5]}' +
+ '
Heterozygous match count: %{customdata[6]}' +
+ '
Homozygous mismatch count: %{customdata[7]}' +
+ '
Heterozygous mismatch count: %{customdata[8]}' +
+ '
Discordance rate: %{customdata[9]}' +
+ '
Status: %{customdata[12]}' +
+ '',
+ zmin=self.zmin,
+ zmax=self.zmax,
+ colorscale='Blues_r'
+ ))
+
+ # add red dots to sample pairs that are unexpected match/mismatch
+
+ data_sub = data[(data['Status']=='Unexpected Match') | (data['Status']=='Unexpected Mismatch')].copy()
+
+ if len(data_sub) > 0:
+ fig.add_trace(
+ go.Scatter(
+ mode="markers",
+ x=data_sub['ReferenceSample'],
+ y=data_sub['QuerySample'],
+ marker_symbol=[17],
+ marker_color="red",
+ marker_line_width=0,
+ marker_size=10,
+ customdata=data_sub.to_numpy(),
+ hovertemplate='%{customdata[12]}'))
+
+ fig.update_layout(
+ yaxis_title="Query samples",
+ xaxis_title="Reference samples",
+ legend_title_text="Discordance",
+ title_text=title,
+ width=width, height=height)
+ fig.write_html(os.path.join(outdir, name))
+
+ data = data[[
+ 'ReferenceSample', 'QuerySample', 'DatabaseComparison',
+ 'HomozygousInRef', 'TotalMatch', 'HomozygousMatch',
+ 'HeterozygousMatch', 'HomozygousMismatch', 'HeterozygousMismatch',
+ 'DiscordanceRate', 'Matched', 'ExpectedMatch', 'Status']]
+
+ def plot(self, data, outdir):
+
+ # make plot for comparing input samples with each other
+
+ data_sub = data[~data['DatabaseComparison']].copy()
+ data_sub['DiscordanceRate'] = data_sub['DiscordanceRate'].map(
+ lambda x: round(x, 4))
+
+ if data_sub.shape[0] > 1:
+ self._plot_heatmap(
+ data_sub, outdir, name='genotype_comparison_input_only.html',
+ title="Discordance calculations between input samples")
+
+ # make plot for comparing input samples with database samples
+
+ data_sub = data[data['DatabaseComparison']].copy()
+ data_sub['DiscordanceRate'] = data_sub['DiscordanceRate'].map(
+ lambda x: round(x, 4))
+
+ if data_sub.shape[0] > 1:
+ self._plot_heatmap(
+ data_sub, outdir, name='genotype_comparison_database.html',
+ title="Discordance calculations between input samples and database samples",
+ size_ratio=self.sample_type_ratio)
+
+ def _compute_discordance(self, samples):
+
+ reference_sample, query_sample = samples
+
+ row = {
+ 'ReferenceSample': reference_sample.sample_name,
+ 'QuerySample': query_sample.sample_name}
+
+ row['HomozygousInRef'] = sum(reference_sample.pileup['genotype_class'] == 'Hom')
+ row['TotalMatch'] = sum(reference_sample.pileup['genotype_class'] == query_sample.pileup['genotype_class'])
+ row['HomozygousMatch'] = sum((reference_sample.pileup['genotype_class'] == query_sample.pileup['genotype_class']) & (reference_sample.pileup['genotype_class'] == 'Hom'))
+ row['HeterozygousMatch'] = sum((reference_sample.pileup['genotype_class'] == query_sample.pileup['genotype_class']) & (reference_sample.pileup['genotype_class'] == 'Het'))
+ row['HomozygousMismatch'] = sum((reference_sample.pileup['genotype'] != query_sample.pileup['genotype']) & ((reference_sample.pileup['genotype_class'] == 'Hom') & (query_sample.pileup['genotype_class'] == 'Hom')))
+ row['HeterozygousMismatch'] = sum((reference_sample.pileup['genotype_class'] != query_sample.pileup['genotype_class']) & ((reference_sample.pileup['genotype_class'] == 'Het') | (query_sample.pileup['genotype_class'] == 'Het')))
+
+ return row
+
+ def genotype(self, samples):
+
+ data = []
+ samples_db = dict(filter(lambda x: x[1].query_group, samples.items()))
+ samples_input = dict(filter(
+ lambda x: not x[1].query_group, samples.items()))
+
+ # get the number of each type of sample and compute a ratio
+ # this is used to plot the heatmap when comparing with database
+ # samples
+
+ sample_n_db = len(samples_db)
+ sample_n_input = len(samples_input)
+
+ if sample_n_db > 0 and sample_n_input > 0 and sample_n_db > sample_n_input:
+ self.sample_type_ratio = sample_n_db / sample_n_input
+
+ # check to see if there are appropriate number of samples to
+ # do the analysis
+
+ if self.no_db_compare:
+ if len(samples_input) <= 1:
+ exit_error("You need to specify 2 or more samples in order to compare genotypes.")
+ else:
+ if len(samples_input) <= 1 and len(samples_db) < 1:
+ exit_error("There are no samples in the database to compare with")
+
+ thread_pool = Pool(self.threads)
+
+ if sample_n_input > 1:
+ # compare all the input samples to each other
+
+ jobs = []
+
+ for i, sample_name1 in enumerate(samples_input):
+ for j, sample_name2 in enumerate(samples_input):
+ jobs.append([samples[sample_name1], samples[sample_name2]])
+
+ results = thread_pool.map(self._compute_discordance, jobs)
+
+ for i in range(len(results)):
+ results[i]['DatabaseComparison'] = False
+ data += results
+
+ # for each input sample, compare with all the samples in the db
+
+ if not self.no_db_compare and sample_n_db > 0:
+
+ jobs = []
+
+ for i, sample_name1 in enumerate(samples_input):
+ for j, sample_name2 in enumerate(samples_db):
+ jobs.append([samples[sample_name1], samples[sample_name2]])
+
+ results = thread_pool.map(self._compute_discordance, jobs)
+ for i in range(len(results)):
+ results[i]['DatabaseComparison'] = True
+ data += results
+
+ data = pd.DataFrame(data)
+
+ # compute discordance rate
+
+ data['DiscordanceRate'] = data['HomozygousMismatch'] / (data['HomozygousInRef'] + EPSILON)
+ # data['DiscordanceRate'] = data['DiscordanceRate'].map(lambda x: round(x, 6))
+ data.loc[data['HomozygousInRef'] < 10, 'DiscordanceRate'] = np.nan
+
+ # for each comparison, indicate if the match/mismatch is expected
+ # or not expected
+
+ data['Matched'] = data['DiscordanceRate'] < self.discordance_threshold
+ data['ExpectedMatch'] = data.apply(
+ lambda x: self.are_samples_same_group(
+ samples[x['ReferenceSample']],
+ samples[x['QuerySample']]), axis=1)
+
+ data['Status'] = ''
+ data.loc[data.Matched & data.ExpectedMatch, 'Status'] = "Expected Match"
+ data.loc[data.Matched & ~data.ExpectedMatch, 'Status'] = "Unexpected Match"
+ data.loc[~data.Matched & data.ExpectedMatch, 'Status'] = "Unexpected Mismatch"
+ data.loc[~data.Matched & ~data.ExpectedMatch, 'Status'] = "Expected Mismatch"
+
+ data = data[[
+ 'ReferenceSample', 'QuerySample', 'DatabaseComparison',
+ 'HomozygousInRef', 'TotalMatch', 'HomozygousMatch',
+ 'HeterozygousMatch', 'HomozygousMismatch', 'HeterozygousMismatch',
+ 'DiscordanceRate', 'Matched', 'ExpectedMatch', 'Status']]
+
+ return data
diff --git a/biometrics/major_contamination.py b/biometrics/major_contamination.py
new file mode 100644
index 0000000..85a2efc
--- /dev/null
+++ b/biometrics/major_contamination.py
@@ -0,0 +1,100 @@
+import os
+
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+
+
+class MajorContamination():
+ """
+ Major contamination.
+ """
+
+ def __init__(self, threshold):
+ self.threshold = threshold
+
+ def to_dataframe(self, samples):
+
+ data = pd.DataFrame(
+ columns=['sample_name', 'sample_group', 'sample_sex', 'sample_type',
+ 'total_sites', 'total_heterozygous_sites',
+ 'major_contamination'])
+
+ for sample_name, sample in samples.items():
+
+ row = {
+ 'sample_name': sample.sample_name,
+ 'sample_group': sample.sample_group,
+ 'sample_sex': sample.sample_sex,
+ 'sample_type': sample.sample_type,
+ 'total_sites': sample.metrics['total_sites'],
+ 'total_heterozygous_sites': sample.metrics['total_heterozygous_sites'],
+ 'major_contamination': sample.metrics['major_contamination']
+ }
+
+ data = data.append(row, ignore_index=True)
+
+ return data
+
+ def plot(self, data, outdir):
+ """
+ Plot minor contamination data.
+ """
+
+ ymax = max(self.threshold, max(data['major_contamination'])) * 1.05
+ data['major_contamination'] = data['major_contamination'].map(
+ lambda x: round(x, 4))
+
+ fig = go.Figure()
+ fig.add_trace(
+ go.Bar(
+ x=data['sample_name'],
+ y=data['major_contamination'],
+ customdata=data.to_numpy(),
+ hovertemplate='Sample group: %{customdata[1]}' +
+ '
Sample name: %{customdata[0]}' +
+ '
Sample sex: %{customdata[2]}' +
+ '
Sample type: %{customdata[3]}' +
+ '
Total sites: %{customdata[5]}' +
+ '
Total heterozygous sites: %{customdata[6]}' +
+ '
Major contamination: %{y:E}' +
+ '',
+ ))
+ fig.update_layout(
+ yaxis_title="Major contamination",
+ title_text="Major contamination across samples",
+ yaxis=dict(range=[0, ymax]))
+ fig.add_shape(
+ type='line',
+ x0=-1,
+ y0=self.threshold,
+ x1=data.shape[0],
+ y1=self.threshold,
+ line=dict(color='Red'),
+ xref='x',
+ yref='y')
+
+ fig.write_html(os.path.join(outdir, 'major_contamination.html'))
+
+ def estimate(self, samples):
+ """
+ Estimate major contamination.
+ """
+
+ for sample_name, sample in samples.items():
+
+ sites = sample.pileup
+ sites_notna = sites[~pd.isna(sites['genotype_class'])]
+
+ het_sites = sites_notna[sites_notna['genotype_class'] == 'Het']
+
+ sample.metrics['total_sites'] = len(sites_notna)
+ sample.metrics['total_heterozygous_sites'] = len(het_sites)
+
+ if sample.metrics['total_sites'] == 0:
+ sample.metrics['major_contamination'] = np.nan
+ else:
+ sample.metrics['major_contamination'] = \
+ len(het_sites) / len(sites_notna)
+
+ return samples
diff --git a/biometrics/minor_contamination.py b/biometrics/minor_contamination.py
new file mode 100644
index 0000000..f6d2454
--- /dev/null
+++ b/biometrics/minor_contamination.py
@@ -0,0 +1,97 @@
+import os
+
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+
+
+class MinorContamination():
+ """
+ Minor contamination.
+ """
+
+ def __init__(self, threshold):
+ self.threshold = threshold
+
+ def to_dataframe(self, samples):
+
+ data = pd.DataFrame(
+ columns=['sample_name', 'sample_group', 'sample_sex', 'sample_type',
+ 'total_homozygous_sites', 'minor_contamination'])
+
+ for sample_name, sample in samples.items():
+
+ row = {
+ 'sample_name': sample.sample_name,
+ 'sample_group': sample.sample_group,
+ 'sample_sex': sample.sample_sex,
+ 'sample_type': sample.sample_type,
+ 'total_homozygous_sites': sample.metrics['total_homozygous_sites'],
+ 'minor_contamination': sample.metrics['minor_contamination']
+ }
+
+ data = data.append(row, ignore_index=True)
+
+ return data
+
+ def plot(self, data, outdir):
+ """
+ Plot major contamination data.
+ """
+
+ ymax = max(self.threshold, max(data['minor_contamination'])) * 1.05
+ data['minor_contamination'] = data['minor_contamination'].map(
+ lambda x: round(x, 4))
+
+ fig = go.Figure()
+ fig.add_trace(
+ go.Bar(
+ x=data['sample_name'],
+ y=data['minor_contamination'],
+ customdata=data.to_numpy(),
+ hovertemplate='Sample group: %{customdata[1]}' +
+ '
Sample name: %{customdata[0]}' +
+ '
Sample sex: %{customdata[2]}' +
+ '
Sample type: %{customdata[3]}' +
+ '
Total homozygous sites: %{customdata[4]}' +
+ '
Minor contamination: %{y:E}' +
+ ''))
+
+ fig.update_layout(
+ yaxis_title="Minor contamination",
+ title_text="Minor contamination across samples",
+ yaxis=dict(range=[0, ymax]))
+
+ fig.add_shape(
+ type='line',
+ x0=-1,
+ y0=self.threshold,
+ x1=data.shape[0],
+ y1=self.threshold,
+ line=dict(color='Red'),
+ xref='x',
+ yref='y')
+
+ fig.write_html(os.path.join(outdir, 'minor_contamination.html'))
+
+ def estimate(self, samples):
+ """
+ Estimate minor contamination.
+ """
+
+ for sample_name, sample in samples.items():
+
+ sites = sample.pileup
+ sites_notna = sites[~pd.isna(sites['genotype_class'])]
+
+ hom_sites = sites_notna[sites_notna['genotype_class'] == 'Hom']
+
+ sample.metrics['total_homozygous_sites'] = len(hom_sites)
+
+ if sample.metrics['total_homozygous_sites'] == 0:
+ sample.metrics['minor_contamination'] = np.nan
+ else:
+ sample.metrics['minor_contamination'] = \
+ hom_sites['minor_allele_freq'].mean()
+
+ return samples
diff --git a/biometrics/sample.py b/biometrics/sample.py
new file mode 100644
index 0000000..5530eee
--- /dev/null
+++ b/biometrics/sample.py
@@ -0,0 +1,76 @@
+import pickle
+import os
+
+import pandas as pd
+from utils import exit_error
+
+
+class Sample:
+ """
+ Class to hold information related to a single sample.
+ """
+
+ def __init__(self, sample_name=None, sample_bam=None, sample_group=None,
+ sample_sex=None, sample_type=None, db=None, query_group=False):
+ self.sample_bam = sample_bam
+ self.sample_name = sample_name
+ self.sample_sex = sample_sex
+ self.sample_group = sample_group
+ self.sample_type = sample_type
+
+ self.pileup = None
+ self.region_counts = None
+ self.extraction_file = None
+ self.query_group = query_group
+ self.metrics = {}
+
+ if db is not None and self.sample_name is not None:
+ self.extraction_file = os.path.join(db, self.sample_name + '.pk')
+
+ def save_to_file(self):
+
+ pileup_data = self.pileup.to_dict("records")
+
+ if self.region_counts is not None:
+ region_counts = self.region_counts.to_dict('records')
+ else:
+ region_counts = None
+
+ sample_data = {
+ 'sample_bam': self.sample_bam,
+ 'sample_name': self.sample_name,
+ 'sample_sex': self.sample_sex,
+ 'sample_group': self.sample_group,
+ 'sample_type': self.sample_type,
+ 'pileup_data': pileup_data,
+ 'region_counts': region_counts
+ }
+
+ pickle.dump(sample_data, open(self.extraction_file, "wb"))
+
+ def load_from_file(self, extraction_file=None):
+
+ if extraction_file is not None:
+ self.extraction_file = extraction_file
+
+ if self.extraction_file is None:
+ exit_error('Extraction file path is None.')
+
+ if not os.path.exists(self.extraction_file):
+ exit_error('Extraction file does not exist: {}'.format(
+ self.extraction_file))
+
+ sample_data = pickle.load(open(self.extraction_file, "rb"))
+
+ region_counts = None
+ if sample_data.get('region_counts') is not None:
+ region_counts = pd.DataFrame(
+ sample_data['region_counts'], dtype=object)
+
+ self.pileup = pd.DataFrame(sample_data['pileup_data'])
+ self.sample_bam = sample_data['sample_bam']
+ self.sample_name = sample_data['sample_name']
+ self.sample_sex = sample_data['sample_sex']
+ self.sample_group = sample_data['sample_group']
+ self.sample_type = sample_data['sample_type']
+ self.region_counts = region_counts
diff --git a/biometrics/sex_mismatch.py b/biometrics/sex_mismatch.py
new file mode 100644
index 0000000..a00d028
--- /dev/null
+++ b/biometrics/sex_mismatch.py
@@ -0,0 +1,46 @@
+import pandas as pd
+import numpy as np
+
+
+class SexMismatch:
+ """
+ Class to detect sex mismatch
+ """
+
+ def __init__(self, threshold):
+ self.threshold = threshold
+
+ def predict_sex(self, sample):
+
+ if sample.region_counts is None:
+ return np.nan
+
+ total_count = sample.region_counts['count'].sum()
+
+ predicted_sex = 'M' if total_count > self.threshold else 'F'
+
+ return predicted_sex
+
+ def detect_mismatch(self, samples):
+
+ results = []
+
+ for i, sample_name in enumerate(samples):
+
+ sample = samples[sample_name]
+
+ predicted_sex = self.predict_sex(sample)
+
+ results.append({
+ 'sample': sample_name,
+ 'expected_sex': sample.sample_sex,
+ 'predicted_sex': predicted_sex
+ })
+
+ results = pd.DataFrame(results)
+
+ results['sex_mismatch'] = \
+ (results['expected_sex'] != results['predicted_sex']).astype(str)
+ results.loc[pd.isna(results['predicted_sex']), 'sex_mismatch'] = np.nan
+
+ return results
diff --git a/biometrics/utils.py b/biometrics/utils.py
new file mode 100644
index 0000000..7fb05ca
--- /dev/null
+++ b/biometrics/utils.py
@@ -0,0 +1,23 @@
+import sys
+
+
+def exit_error(msg):
+ print("ERROR: {}".format(msg))
+ sys.exit(1)
+
+
+def standardize_sex_nomenclature(val):
+
+ if val is None:
+ return None
+
+ # Potential inputs
+ female = ['female', 'f', 'Female', 'F']
+ male = ['Male', 'M', 'male', 'm']
+
+ if val in female:
+ return 'F'
+ elif val in male:
+ return 'M'
+
+ return None
diff --git a/docs/.gitbook/assets/genotype_comparison_database.html b/docs/.gitbook/assets/genotype_comparison_database.html
new file mode 100644
index 0000000..c3d7ff3
--- /dev/null
+++ b/docs/.gitbook/assets/genotype_comparison_database.html
@@ -0,0 +1,67 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.gitbook/assets/genotype_comparison_input_only.html b/docs/.gitbook/assets/genotype_comparison_input_only.html
new file mode 100644
index 0000000..2735059
--- /dev/null
+++ b/docs/.gitbook/assets/genotype_comparison_input_only.html
@@ -0,0 +1,67 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.gitbook/assets/major_contamination.html b/docs/.gitbook/assets/major_contamination.html
new file mode 100644
index 0000000..3f61624
--- /dev/null
+++ b/docs/.gitbook/assets/major_contamination.html
@@ -0,0 +1,67 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.gitbook/assets/minor_contamination.html b/docs/.gitbook/assets/minor_contamination.html
new file mode 100644
index 0000000..a55d203
--- /dev/null
+++ b/docs/.gitbook/assets/minor_contamination.html
@@ -0,0 +1,67 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 556165f..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = python -msphinx
-SPHINXPROJ = biometrics
-SOURCEDIR = .
-BUILDDIR = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..14c1014
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,14 @@
+# sequence\_qc
+
+Python package for calculating sample contamination metrics and sample sex verification.
+
+[![Build Status](https://travis-ci.com/msk-access/biometrics.svg?branch=master)](https://travis-ci.com/msk-access/biometrics) [![PyPi](https://img.shields.io/pypi/v/biometrics.svg?)](https://pypi.python.org/pypi/biometrics)
+
+* Free software: Apache Software License 2.0
+* Documentation: [https://msk-access.gitbook.io/biometrics/](https://msk-access.gitbook.io/biometrics/)
+
+## Installation
+
+From pypi:
+
+`pip install biometrics`
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
new file mode 100644
index 0000000..bf21c59
--- /dev/null
+++ b/docs/SUMMARY.md
@@ -0,0 +1,9 @@
+# Table of contents
+
+* [biometrics](README.md)
+* [Introduction](introduction.md)
+* [Extraction](extraction.md)
+* [Genotype](genotype.md)
+* [Minor contamination](minor-contamination.md)
+* [Major contamination](major-contamination.md)
+* [Sex mismatch](sex-mismatch.md)
diff --git a/docs/authors.rst b/docs/authors.rst
deleted file mode 100644
index e122f91..0000000
--- a/docs/authors.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../AUTHORS.rst
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100755
index 844b255..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env python
-#
-# biometrics documentation build configuration file, created by
-# sphinx-quickstart on Fri Jun 9 13:47:02 2017.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another
-# directory, add these directories to sys.path here. If the directory is
-# relative to the documentation root, use os.path.abspath to make it
-# absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('..'))
-
-import biometrics
-
-# -- General configuration ---------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = 'biometrics'
-copyright = "2019, Ronak Shah"
-author = "Ronak Shah"
-
-# The version info for the project you're documenting, acts as replacement
-# for |version| and |release|, also used in various other places throughout
-# the built documents.
-#
-# The short X.Y version.
-version = biometrics.__version__
-# The full version, including alpha/beta/rc tags.
-release = biometrics.__version__
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output -------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'alabaster'
-
-# Theme options are theme-specific and customize the look and feel of a
-# theme further. For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-
-# -- Options for HTMLHelp output ---------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'biometricsdoc'
-
-
-# -- Options for LaTeX output ------------------------------------------
-
-latex_elements = {
- # The paper size ('letterpaper' or 'a4paper').
- #
- # 'papersize': 'letterpaper',
-
- # The font size ('10pt', '11pt' or '12pt').
- #
- # 'pointsize': '10pt',
-
- # Additional stuff for the LaTeX preamble.
- #
- # 'preamble': '',
-
- # Latex figure (float) alignment
- #
- # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass
-# [howto, manual, or own class]).
-latex_documents = [
- (master_doc, 'biometrics.tex',
- 'biometrics Documentation',
- 'Ronak Shah', 'manual'),
-]
-
-
-# -- Options for manual page output ------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'biometrics',
- 'biometrics Documentation',
- [author], 1)
-]
-
-
-# -- Options for Texinfo output ----------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- (master_doc, 'biometrics',
- 'biometrics Documentation',
- author,
- 'biometrics',
- 'One line description of project.',
- 'Miscellaneous'),
-]
-
-
-
diff --git a/docs/contributing.rst b/docs/contributing.rst
deleted file mode 100644
index e582053..0000000
--- a/docs/contributing.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../CONTRIBUTING.rst
diff --git a/docs/extraction.md b/docs/extraction.md
new file mode 100644
index 0000000..6aba6e0
--- /dev/null
+++ b/docs/extraction.md
@@ -0,0 +1,50 @@
+---
+description: Step for preparing the BAM file(s)
+---
+
+## Extract
+Running this step is a **prerequisite** before running any of the other tools. This step extracts the pileup and coverage information from your BAM file(s) and stores the result in a Python pickle file (which contains JSON data). You can determine where to store the output files by specifying `-db` argument. This allows for much faster analyses that make repeated use of your samples.
+
+There are two main types of required input:
+
+- **Sample information:** the BAM file and any associate annotation (e.g. sample grouping).
+- **Supporting files:** reference FASTA, VCF, and BED file.
+
+Moreover, there are two ways to provide the sample information: (1) provide a CSV file, or (2) specify via the command line arguments.
+
+#### CSV input
+This method is easier for when you have many samples. Just provide a CSV file with five columns: sample name, sample group, sample type, sample sex, and path to the sample's BAM file. An example with three samples is below:
+
+```text
+sample_name,sample_group,sample_type,sample_sex,sample_bam
+C-48665L-N001-d,C-48665L,Normal,F,/path/to/C-48665L-N001-d.bam
+C-PCYP90-N001-d,C-PCYP90,Normal,M,/path/to/C-PCYP90-N001-d.bam
+C-MH6AL9-N001-d,C-MH6AL9,Normal,F,/path/to/C-MH6AL9-N001-d.bam
+```
+
+Here is an example command line usage for three samples:
+
+```shell
+biometrics extract \
+ -i inputs.csv \
+ --vcf /path/to/vcf \
+ --bed /path/to/bed/file \
+ -db /path/to/store/extract/output \
+ -f /path/to/reference.fasta
+```
+
+#### Command line input
+You can also specify each of your samples via command line arguments. Here is an example:
+
+```shell
+biometrics extract \
+ -sn C-48665L-N001-d C-PCYP90-N001-d C-MH6AL9-N001-d \
+ -sb /path/to/C-48665L-N001-d.bam /path/to/C-PCYP90-N001-d.bam /path/to/C-MH6AL9-N001-d.bam \
+ -st Normal Normal Normal \
+ -ss F M F \
+ -sg C-48665L C-PCYP90 C-MH6AL9 \
+ --vcf /path/to/vcf \
+ --bed /path/to/bed/file \
+ -db /path/to/store/extract/output \
+ -f /path/to/reference.fasta
+```
diff --git a/docs/genotype.md b/docs/genotype.md
new file mode 100644
index 0000000..ad8fffb
--- /dev/null
+++ b/docs/genotype.md
@@ -0,0 +1,81 @@
+---
+description: For finding sample matches and mismatches.
+---
+
+# Genotype
+Compares each sample against each other to verify expected sample matches and identify any unexpected matches or mismatches. Running these comparisons requires the extracted pileup information to compute a discordance score between each pair of samples. The documentation below details the different ways to run this analysis, the output, and the methods behind them.
+
+### How to run the tool
+You need one or more samples to run this analysis. However, if you supply just one sample then it is assumed you have samples already in the database to compare with. There are two required inputs: (1) the names of the sample(s) you want to compare (referred to as `input samples` below), and (2) the database (biometrics will automatically load all sample data from the database). Moreover, there are two types of comparisons that are performed when running the tool:
+
+##### (1) Compares your input samples with each other
+This only runs if you supplied two or more input samples. The easiest way to indicate which samples you want to compare is to give the sample names via the `--sample-name` flag. Below is an example command:
+
+```shell
+biometrics genotype \
+ -sn C-48665L-N001-d C-PCYP90-N001-d C-MH6AL9-N001-d \
+ -db /path/to/store/extract/output
+```
+
+You can also indicate your input samples via a CSV file, which has the same format as what you provided to the extraction tool, but you only need the `sample_name` column:
+
+```shell
+biometrics genotype \
+ -i samples.csv \
+ -db /path/to/store/extract/output
+```
+
+##### (2) Compares your input samples with remaining database samples
+The second analysis will compare each of your input samples with all remaining samples in the database. However, if you wish to disable this step and not do the comparison then you can supply the `--no-db-compare` flag:
+
+```shell
+biometrics genotype \
+ -sn C-48665L-N001-d C-PCYP90-N001-d C-MH6AL9-N001-d \
+ --no-db-compare \
+ -db /path/to/store/extract/output
+```
+
+### Output
+
+All analyses output a CSV file containing the metrics from comparing each sample. An interactive heatmap can also optionally be produced by supplying the `--plot` flag. These outputs are saved either to the current working directory or to a folder you specify via `--outdir`.
+
+##### CSV file
+The CSV file contains metrics for each pair of samples compared (one on each line). Table 1 below provides a description on each column.
+
+| Column Name | Description |
+| :--- | :--- |
+| ReferenceSample | First sample in the comparison. |
+| QuerySample | Second sample in the comparison. |
+| DatabaseComparison | True if the comparison was between one of your input samples and one in the database. |
+| HomozygousInRef | Number of homozygous sites in the ReferenceSample. |
+| TotalMatch | Total sites that match (homozygous and heterozygous). |
+| HomozygousMatch | Number of homozygous sites that match. |
+| HeterozygousMatch | Number of heterozygous sites that match. |
+| HomozygousMismatch | Number of mismatching homozygous sites. |
+| HeterozygousMismatch | Number of mismatching heterozygous sites. |
+| DiscordanceRate | Discordance rate metric. |
+| Matched | True if ReferenceSample and QuerySample have DiscordanceRate less than the threshold (default 0.05). |
+| ExpectedMatch | True if the sample pair is expected to match. |
+| Status | Takes one of the following: Expected Match, Unexpected Match, Unexpected Mismatch, or Expected Mismatch. |
+
+##### Interactive plot
+
+Below are the two figures that are outputted from the two types of comparisons that are done. Samples that are unexpected matches or mismatches will be marked with a red star in the heatmap.
+
+![](.gitbook/assets/genotype_comparison_input_only.html)
+
+![](.gitbook/assets/genotype_comparison_database.html)
+
+### Algorithm details
+
+Any samples with a discordance rate of 5% or higher are considered mismatches.
+
+$$
+\[\\
+Discordance\ Rate = \frac{Number of matching homozygous sites in Reference but not Query}{Number of SNP sites homozygous in Reference}\\
+\]\\
+$$
+
+{% hint style="info" %}
+If there are <10 common homozygous sites, the discordance rate can not be calculated since this is a strong indication that coverage is too low and the samples failed other QC.
+{% endhint %}
diff --git a/docs/history.rst b/docs/history.rst
deleted file mode 100644
index 2506499..0000000
--- a/docs/history.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../HISTORY.rst
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 1ae12c4..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-Welcome to biometrics's documentation!
-======================================
-
-.. toctree::
- :maxdepth: 2
- :caption: Contents:
-
- readme
- installation
- usage
- modules
- contributing
- authors
- history
-
-Indices and tables
-==================
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/installation.rst b/docs/installation.rst
deleted file mode 100644
index f413691..0000000
--- a/docs/installation.rst
+++ /dev/null
@@ -1,51 +0,0 @@
-.. highlight:: shell
-
-============
-Installation
-============
-
-
-Stable release
---------------
-
-To install biometrics, run this command in your terminal:
-
-.. code-block:: console
-
- $ pip install biometrics
-
-This is the preferred method to install biometrics, as it will always install the most recent stable release.
-
-If you don't have `pip`_ installed, this `Python installation guide`_ can guide
-you through the process.
-
-.. _pip: https://pip.pypa.io
-.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
-
-
-From sources
-------------
-
-The sources for biometrics can be downloaded from the `Github repo`_.
-
-You can either clone the public repository:
-
-.. code-block:: console
-
- $ git clone git://github.com/rhshah/biometrics
-
-Or download the `tarball`_:
-
-.. code-block:: console
-
- $ curl -OJL https://github.com/rhshah/biometrics/tarball/master
-
-Once you have a copy of the source, you can install it with:
-
-.. code-block:: console
-
- $ python setup.py install
-
-
-.. _Github repo: https://github.com/rhshah/biometrics
-.. _tarball: https://github.com/rhshah/biometrics/tarball/master
diff --git a/docs/introduction.md b/docs/introduction.md
new file mode 100644
index 0000000..5b9d988
--- /dev/null
+++ b/docs/introduction.md
@@ -0,0 +1,32 @@
+---
+description: Basics on the usage of biometrics
+---
+
+# Introduction to Biometrics
+
+Biometrics is a Python package to compute various metrics for assessing sample contamination, sample swaps, and sample sex validation. The package is composed of five tools (see below). All the tools (except the sex mismatch one) depend on you providing a VCF file of SNPs to use for computing the metrics. The sex mismatch tool requires you to provide a BED file containing the Y chromosome regions of interest.
+
+#### Extract
+Running this step is **required** before running any of the other four tools. This step extracts the pileup and coverage information from your BAM file(s) and stores the result in a file. The file can then be accessed not just for your initial analysis but for all subsequent analyses that make use of the sample. This provides a significant speed boost to running the four downstream biometrics tools.
+
+Click [here](extraction.md) to read more about this tool.
+
+#### Genotype
+Compares each each sample against each other to verify expected sample matches and identify any unexpected matches or mismatches. Relies on computing a discordance score between each pair of samples.
+
+Click [here](genotype.md) to read more about this tool.
+
+#### Minor contamination
+Minor contamination check is done to see if a patient’s sample is contaminated with a little DNA from unrelated individuals.
+
+Click [here](minor-contamination.md) to read more about this tool.
+
+#### Major contamination
+Major contamination check is done to see if a patient’s sample is contaminated with DNA from unrelated individuals.
+
+Click [here](major-contamination.md) to read more about this tool.
+
+#### Sex mismatch
+Used to determine if the predicted sex mismatches the expected sex for a given sample.
+
+Click [here](sex-mismatch.md) to read more about this tool.
diff --git a/docs/major-contamination.md b/docs/major-contamination.md
new file mode 100644
index 0000000..d96fadb
--- /dev/null
+++ b/docs/major-contamination.md
@@ -0,0 +1,50 @@
+---
+description: Calculate major contamination
+---
+
+# Major contamination
+Major contamination is a metric to see if a sample is contaminated with small amounts of DNA from another unrelated sample. The metric requires the extracted pileup information from running the `extract` tool.
+
+### How to run the tool
+You can run this tool with one or more samples. The two required inputs are the list of sample names and the database (biometrics will automatically load all sample data from the database). Below is an example command:
+
+```shell
+biometrics major \
+ -sn C-48665L-N001-d C-PCYP90-N001-d C-MH6AL9-N001-d \
+ -db /path/to/store/extract/output
+```
+
+You can also indicate your input samples via a CSV file, which has the same format as what you provided to the extraction tool, but you only need the `sample_name` column:
+
+```shell
+biometrics major \
+ -i samples.csv \
+ -db /path/to/store/extract/output
+```
+
+### Output
+
+All analyses output a CSV file containing the metrics for each sample. An interactive bar graph can also optionally be produced by supplying the `--plot` flag. These outputs are saved either to the current working directory or to a folder you specify via `--outdir`.
+
+##### CSV file
+The CSV file contains metrics for each pair of samples compared (one per line). The table below describes each column in the CSV output:
+
+| Column Name | Description |
+| :--- | :--- |
+| sample_name | Sample name. |
+| sample_group | Sample group (if available). |
+| sample_sex | Sample sex (if available). |
+| sample_type | Sample type (if available). |
+| total_sites | Total number of sites. |
+| total_heterozygous_sites | Total number of heterozygous sites. |
+| major_contamination | Major contamination metric. |
+
+
+##### Interactive plot
+Below is an example bar plot showing the per-sample major contamination metrics. You can hover over each bar to get more information about the sample. You can also control the major contamination threshold (the horizontal red line) via the `--major-threshold` flag.
+
+![](.gitbook/assets/major_contamination.html)
+
+### Algorithm details
+
+Major contamination is calculated as the number of heterozygous sites divided by the total number of sites. A heterozygous site is defined as one with > 10% minor allele frequency.
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 1e8cd70..0000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,36 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=python -msphinx
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-set SPHINXPROJ=biometrics
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
- echo.
- echo.The Sphinx module was not found. Make sure you have Sphinx installed,
- echo.then set the SPHINXBUILD environment variable to point to the full
- echo.path of the 'sphinx-build' executable. Alternatively you may add the
- echo.Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
- exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
diff --git a/docs/minor-contamination.md b/docs/minor-contamination.md
new file mode 100644
index 0000000..d1fa926
--- /dev/null
+++ b/docs/minor-contamination.md
@@ -0,0 +1,49 @@
+---
+description: Calculate minor contamination
+---
+
+# Minor contamination
+Minor contamination is a metric to see if a sample is contaminated with small amounts of DNA from another unrelated sample. The metric requires the extracted pileup information from running the `extract` tool.
+
+### How to run the tool
+You can run this tool with one or more samples. The two required inputs are the list of sample names and the database (biometrics will automatically load all sample data from the database). Below is an example command:
+
+```shell
+biometrics minor \
+ -sn C-48665L-N001-d C-PCYP90-N001-d C-MH6AL9-N001-d \
+ -db /path/to/store/extract/output
+```
+
+You can also indicate your input samples via a CSV file, which has the same format as what you provided to the extraction tool, but you only need the `sample_name` column:
+
+```shell
+biometrics minor \
+ -i samples.csv \
+ -db /path/to/store/extract/output
+```
+
+### Output
+
+All analyses output a CSV file containing the metrics for each sample. An interactive bar graph can also optionally be produced by supplying the `--plot` flag. These outputs are saved either to the current working directory or to a folder you specify via `--outdir`.
+
+##### CSV file
+The CSV file contains metrics for each pair of samples compared (one per line). The table below describes each column in the CSV output:
+
+| Column Name | Description |
+| :--- | :--- |
+| sample_name | Sample name. |
+| sample_group | Sample group (if available). |
+| sample_sex | Sample sex (if available). |
+| sample_type | Sample type (if available). |
+| total_homozygous_sites | Total number of homozygous sites. |
+| minor_contamination | Minor contamination metric. |
+
+
+##### Interactive plot
+Below is an example bar plot showing the per-sample minor contamination metrics. You can hover over each bar to get more information about the sample. You can also control the minor contamination threshold (the horizontal red line) via the `--minor-threshold` flag.
+
+![](.gitbook/assets/minor_contamination.html)
+
+### Algorithm details
+
+Minor contamination is calculated as the average minor allele frequency for homozygous sites. A homozygous site is defined as one with < 10% minor allele frequency.
diff --git a/docs/readme.rst b/docs/readme.rst
deleted file mode 100644
index 72a3355..0000000
--- a/docs/readme.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../README.rst
diff --git a/docs/sex-mismatch.md b/docs/sex-mismatch.md
new file mode 100644
index 0000000..5bbea24
--- /dev/null
+++ b/docs/sex-mismatch.md
@@ -0,0 +1,34 @@
+---
+description: Determine if a sample's predicted and known sex mismatch.
+---
+
+# Sample sex verification
+This tool uses read coverage data on the Y chromosome to predict the sex for a sample, and then the compares the prediction to the expected sex to see if there is a mismatch. The metric requires the extracted coverage information from running the `extract` tool with the `--bed` flag supplied.
+
+### How to run the tool
+You can run this tool with one or more samples. At a minimum, the two required inputs are the list of sample names and the database (biometrics will automatically load all sample data from the database). Below is an example command:
+
+```shell
+biometrics sexmismatch \
+ -sn C-48665L-N001-d C-PCYP90-N001-d C-MH6AL9-N001-d \
+ -db /path/to/store/extract/output
+```
+
+You can also indicate your input samples via a CSV file, which has the same format as what you provided to the extraction tool, but you only need the `sample_name` column:
+
+```shell
+biometrics sexmismatch \
+ -i samples.csv \
+ -db /path/to/store/extract/output
+```
+
+### Output
+
+All analyses output a CSV file containing the metrics for each sample. It will be saved either to the current working directory or to a folder you specify via `--outdir`. The table below describes each column in the CSV output.
+
+| Column Name | Description |
+| :--- | :--- |
+| sample_name | Sample name. |
+| expected_sex | The sample's expected sex. |
+| predicted_sex | The sample's predicted sex. |
+| sex_mismatch | True if expected and predicted sex mismatch. |
diff --git a/docs/usage.rst b/docs/usage.rst
deleted file mode 100644
index e8c6f17..0000000
--- a/docs/usage.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=====
-Usage
-=====
-
-To use biometrics in a project::
-
- import biometrics
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ba58264
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+biometrics==0.1.1
+numpy==1.19.5
+pandas==1.2.0
+plotly==4.14.1
+pysam==0.16.0.1
+python-dateutil==2.8.1
+pytz==2020.5
+PyVCF==0.6.8
+retrying==1.3.3
+six==1.15.0
diff --git a/setup.cfg b/setup.cfg
index 2a71a62..2f43021 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 0.1.0
+current_version = 0.1.1
commit = True
tag = True
@@ -19,4 +19,3 @@ exclude = docs
[aliases]
# Define setup.py command aliases here
-
diff --git a/setup.py b/setup.py
index 4ccdb56..2130cf4 100644
--- a/setup.py
+++ b/setup.py
@@ -4,24 +4,31 @@
from setuptools import setup, find_packages
-with open('README.rst') as readme_file:
+with open('README.md') as readme_file:
readme = readme_file.read()
with open('HISTORY.rst') as history_file:
history = history_file.read()
-requirements = ['Click>=7.0', ]
-setup_requirements = [ ]
+def req_file(filename):
+ """
+ We're using a requirements.txt file so that pyup.io can use this for security checks
+ :param filename:
+ :return str:
+ """
+ with open(filename) as f:
+ content = f.readlines()
+ content = filter(lambda x: not x.startswith("#"), content)
+ return [x.strip() for x in content]
-test_requirements = [ ]
setup(
author="Ronak Shah",
author_email='rons.shah@gmail.com',
python_requires='>=3.5',
classifiers=[
- 'Development Status :: 2 - Pre-Alpha',
+ 'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
@@ -37,17 +44,18 @@
'biometrics=biometrics.cli:main',
],
},
- install_requires=requirements,
+ install_requires=req_file("requirements.txt"),
license="Apache Software License 2.0",
long_description=readme + '\n\n' + history,
include_package_data=True,
keywords='biometrics',
name='biometrics',
packages=find_packages(include=['biometrics', 'biometrics.*']),
- setup_requires=setup_requirements,
+ package_data={
+ "": ['requirements.txt', 'requirements_dev.txt'],
+ },
test_suite='tests',
- tests_require=test_requirements,
- url='https://github.com/rhshah/biometrics',
- version='0.1.0',
+ url='https://github.com/msk-access/biometrics',
+ version='0.1.1',
zip_safe=False,
)
diff --git a/tests/test_biometrics.py b/tests/test_biometrics.py
index 45e45cd..9a427b8 100644
--- a/tests/test_biometrics.py
+++ b/tests/test_biometrics.py
@@ -3,31 +3,125 @@
"""Tests for `biometrics` package."""
-import unittest
-from click.testing import CliRunner
+import os
+import argparse
+from unittest import TestCase
+from unittest import mock
-from biometrics import biometrics
-from biometrics import cli
+from biometrics.biometrics import get_samples, run_minor_contamination, run_major_contamination
+from biometrics.cli import get_args
+from biometrics.extract import Extract
+from biometrics.genotype import Genotyper
+from biometrics.sex_mismatch import SexMismatch
+CUR_DIR = os.path.dirname(os.path.abspath(__file__))
-class TestBiometrics(unittest.TestCase):
+
+class TestBiometrics(TestCase):
"""Tests for `biometrics` package."""
- def setUp(self):
+ @mock.patch(
+ 'argparse.ArgumentParser.parse_args',
+ return_value=argparse.Namespace(
+ subparser_name='extract',
+ input=None,
+ sample_bam=[
+ os.path.join(CUR_DIR, 'test_data/test_sample1_golden.bam'),
+ os.path.join(CUR_DIR, 'test_data/test_sample2_golden.bam')],
+ sample_name=['test_sample1', 'test_sample2'],
+ sample_type=['tumor', 'tumor'],
+ sample_group=['patient1', 'patient1'],
+ sample_sex=['M', 'M'],
+ database=os.path.join(CUR_DIR, 'test_data/'),
+ vcf=os.path.join(CUR_DIR, 'test_data/test.vcf'),
+ fafile=os.path.join(CUR_DIR, 'test_data/ref.fasta'),
+ bed=os.path.join(CUR_DIR, 'test_data/test.bed'),
+ min_mapping_quality=1,
+ min_base_quality=1,
+ min_coverage=10,
+ minor_threshold=0.002,
+ major_threshold=0.6,
+ discordance_threshold=0.05,
+ coverage_threshold=50,
+ zmin=None,
+ zmax=None,
+ outdir='.',
+ json=None,
+ plot=True,
+ default_genotype=None,
+ overwrite=True,
+ no_db_compare=False,
+ threads=1))
+ def setUp(self, mock_args):
"""Set up test fixtures, if any."""
- def tearDown(self):
- """Tear down test fixtures, if any."""
-
- def test_000_something(self):
- """Test something."""
-
- def test_command_line_interface(self):
- """Test the CLI."""
- runner = CliRunner()
- result = runner.invoke(cli.main)
- assert result.exit_code == 0
- assert 'biometrics.cli.main' in result.output
- help_result = runner.invoke(cli.main, ['--help'])
- assert help_result.exit_code == 0
- assert '--help Show this message and exit.' in help_result.output
+ self.args = get_args()
+
+ def test_load_vcf(self):
+ """Test loading the VCF file."""
+
+ extractor = Extract(self.args)
+
+ self.assertGreater(
+ len(extractor.sites), 0, msg="Could not parse VCF sites.")
+ self.assertEqual(
+ len(extractor.sites), 15,
+ msg="Did not parse right number of sites.")
+
+ def test_load_bed(self):
+ """Test loading the BED file."""
+
+ extractor = Extract(self.args)
+
+ self.assertEqual(
+ len(extractor.regions), 1, msg="Expected 1 region in BED file.")
+
+ def test_extract_sample(self):
+
+ extractor = Extract(self.args)
+ samples = get_samples(self.args, extraction_mode=True)
+ samples = extractor.extract(samples)
+
+ self.assertEqual(len(samples), 2, msg='Did not load 2 samples.')
+ self.assertEqual(samples['test_sample1'].sample_name, 'test_sample1', msg='Sample was not loaded correctly.')
+ self.assertIsNotNone(samples['test_sample1'].pileup, msg='Sample pileup was not loaded correctly.')
+ self.assertEqual(samples['test_sample1'].pileup.shape[0], 15, msg='Did not find pileup for 4 variants. Found: {}.'.format(samples['test_sample1'].pileup))
+
+ def test_sample_minor_contamination(self):
+ samples = get_samples(self.args, extraction_mode=False)
+ samples = run_minor_contamination(self.args, samples)
+
+ self.assertAlmostEqual(
+ samples['test_sample1'].metrics['minor_contamination'], 0.0043,
+ places=4, msg='Minor contamination is wrong.')
+
+ def test_sample_major_contamination(self):
+ samples = get_samples(self.args, extraction_mode=False)
+ samples = run_major_contamination(self.args, samples)
+
+ self.assertAlmostEqual(
+ samples['test_sample1'].metrics['major_contamination'], 0.2,
+ places=1, msg='Major contamination is wrong.')
+
+ def test_genotyper(self):
+ samples = get_samples(self.args, extraction_mode=False)
+
+ genotyper = Genotyper(
+ no_db_compare=self.args.no_db_compare,
+ discordance_threshold=self.args.discordance_threshold,
+ threads=self.args.threads,
+ zmin=self.args.zmin,
+ zmax=self.args.zmax)
+ data = genotyper.genotype(samples)
+
+ self.assertEqual(len(data), 4, msg='There were not four comparisons done.')
+ self.assertEqual(set(data['Status']), set(['Expected Match']), msg='All sample comparisons were expected to match.')
+
+ def test_sexmismatch(self):
+ samples = get_samples(self.args, extraction_mode=False)
+
+ sex_mismatch = SexMismatch(self.args.coverage_threshold)
+ results = sex_mismatch.detect_mismatch(samples)
+
+ self.assertEqual(set(results['expected_sex']), set(['M']), msg='Expected all samples to have an expected sex of M.')
+ self.assertEqual(set(results['predicted_sex']), set(['M']), msg='Expected all samples to not have a sex mismatch.')
diff --git a/tests/test_data/main.sh b/tests/test_data/main.sh
new file mode 100644
index 0000000..3c864e1
--- /dev/null
+++ b/tests/test_data/main.sh
@@ -0,0 +1,100 @@
+# simulate BAM and FASTQ data from reference fasta and vcf file
+
+python2 /Users/charlesmurphy/Desktop/tools/neat-genreads/genReads.py \
+ -r ref.fasta \
+ -R 50 \
+ -o test_sample1 \
+ -c 30 \
+ -p 12 \
+ -v test.vcf \
+ --bam \
+ -M 0 \
+ -E 0 \
+ --pe 120 5
+~/Desktop/tools/samtools-1.10/samtools index test_sample1_golden.bam
+
+python2 /Users/charlesmurphy/Desktop/tools/neat-genreads/genReads.py \
+ -r ref.fasta \
+ -R 50 \
+ -o test_sample2 \
+ -c 30 \
+ -p 12 \
+ -v test.vcf \
+ --bam \
+ -M 0 \
+ -E 0 \
+ --pe 120 5
+~/Desktop/tools/samtools-1.10/samtools index test_sample2_golden.bam
+
+# python biometrics/cli.py extract -sb ./tests/test_data/test_golden.bam -st tumor -ss male -sp P1 -sn test --vcf tests/test_data/test.vcf -db . --fafile ./tests/test_data/ref.fasta --overwrite
+#
+# python biometrics/cli.py minor -sb ./tests/test_data/test_golden.bam -st tumor -ss male -sp P1 -sn test --vcf tests/test_data/test.vcf -db . --fafile ./tests/test_data/ref.fasta
+#
+# python biometrics/cli.py genotype \
+# -sb ./tests/test_data/test_sample1_golden.bam ./tests/test_data/test_sample2_golden.bam \
+# -st tumor tumor -ss male male -sg patien1 patient1 -sn test_sample1 test_sample2 \
+# --vcf tests/test_data/test.vcf -db . --fafile ./tests/test_data/ref.fasta
+#
+#
+# python biometrics/cli.py extract \
+# -sb /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+# -st tumor -ss male -sg C-52YNHF -sn C-52YNHF-N001-d \
+# --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+# -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/ \
+# --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta \
+# --overwrite
+#
+biometrics extract \
+ -sb /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ -st tumor -ss male -sg C-52YNHF -sn C-52YNHF-N002-d \
+ --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+ --bed /Users/charlesmurphy/Desktop/data/innovation/resources/MSK-ACCESS-v1.0/MSK-ACCESS-v1_0-probe-A.sorted.bed \
+ -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/db/ \
+ --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta \
+ --overwrite
+#
+biometrics genotype \
+ -sb \
+ /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ -st tumor tumor -ss male male -sg C-52YNHF C-52YNHF -sn C-52YNHF-N001-d C-52YNHF-N002-d \
+ --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+ -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/db/ \
+ --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta \
+ --outdir ~/Desktop --json --plot
+
+biometrics major \
+ -sb /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ -st tumor -ss male -sg C-52YNHF -sn C-52YNHF-N001-d \
+ --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+ -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/db/ \
+ --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta \
+ --outdir ~/Desktop --json
+
+biometrics minor \
+ -sb /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ -st tumor -ss male -sg C-52YNHF -sn C-52YNHF-N001-d \
+ --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+ -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/db/ \
+ --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta \
+ --outdir ~/Desktop --json
+
+biometrics sexmismatch \
+ -sb /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ -st tumor -ss male -sg C-52YNHF -sn C-52YNHF-N001-d \
+ --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+ --bed /Users/charlesmurphy/Desktop/data/innovation/resources/MSK-ACCESS-v1.0/MSK-ACCESS-v1_0-probe-A.sorted.bed \
+ -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/db/ \
+ --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta \
+ --outdir ~/Desktop --json
+
+
+for i in `seq 1 1000`
+do
+ python ../../biometrics/cli.py extract \
+ -sb /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR.bam \
+ -st tumor -ss male -sg C-52YNHF -sn C-52YNHF-N002-d-$i \
+ --vcf /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/fingerprinting_snps.vcf \
+ -db /Users/charlesmurphy/Desktop/mskcc-analyses/201013_fingerprinting/db3/ \
+ --fafile /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta
+done
diff --git a/tests/test_data/ref.fasta b/tests/test_data/ref.fasta
new file mode 100644
index 0000000..811c4e5
--- /dev/null
+++ b/tests/test_data/ref.fasta
@@ -0,0 +1,84 @@
+>1
+ACGATCAATAGAGTGAAAAAGATAACCTCTTGAATGGGAGAAATATTTGCAAACTACTCATCCAACCGGGGATTGATATC
+CAGAATATACAAGTAACACAAATATGTCAAAAGTAAAATAAATAAATAAATAAATAAATAAATAAATTAAATAAATTATT
+TAAAAATCGGCAGAGGACAGGAATAGACATTTCTCAGGAGACAACATACAAAGGGCCACAGATACATCAAAAAATGCTCA
+ACATCACTATTTGTCAGGGAAGTACTAATTAAAACCAAAATGAGATGTCCCCTCAAACCTGTTAGAATGGCTATTATCAA
+AAAGATGAAAGATAGCAACTATCAGAGAGGATGATAGAAAAGGGAACCCTTGCATCATGTACAAATTAAAAATAGAACTA
+TCACATGATCCAAGAATCCTACTTCTGGGTATATAGCCAAAGGAATTGAAATCAATATGTCAAAGGGATATCTGCACTCC
+TATGTTATTGCAGCATGTTCACAATGGCCAAGATATAGAATCAACCTAACTGTTCATAGACAGATGAATGGATAAATGAA
+ATGTGATATGGAAAATTATTCAGCCTTAAAAACAGTAGGAAATTCTGTCATTTGAGACAACGTGGATGAACCTAGAGGAC
+ATTAAGCTAAGTGAAATAAGCTAGACACAGAAAGACAAATATTGCATGATCTCACTTAGAATCTAAAAAATCTGAACTCA
+TAGAAGCAGAGAATAGTATGATGGTTACTAGGGTTATCTGGCAGGGAGAGGATGAGGAAATGGGACATTGTTAATAAAAG
+GAAAAAAATTCAATTAGTAGGATTACATTCAGGGGACCCAATATACGACATGTTGACTGTAATTAATAATGTATTGTATG
+CTTGAAAATTGCTAATACAGTATATTGTAAATGTTAATATGAGGTAATATATGTGTTAATTAACTTGATTTATTCATTCA
+ACAACATACACATATATTAAAACATCACACTGTATTCCACAAATATATATAATTTTTGTCAATTAAAAAATAATTTTTAA
+AAATGAGAAACAAAAAAGCTGACATTTTCAGATTAAAAAAATTATACAGAAGAATTAATTCATTAAAGTAAAAACAAATG
+TGGGAAAATGGTTTTTAAATATAATTTAAACCAAATTTAAAATAAGCATATAAAGACTATGGACAAAACAAGAAATCCAA
+ATAAAAAATAAACATATGAAGAATATTCAAACTCACTTTTTATCAAAGAAATGTAAATTTTAAAATATAGCATTGCTATT
+GTGTTTTCATAAATAATAATATATCATGGATGAGCCTGTGAGGAAACAGACACTCATACTCTGCAAAGCAATGACTAAGA
+TAATTATGTCAGATCATGAATTACGTTAATTAGCTTGATGGTGGTCACTGTTTCACGATAAATATACATATGTATCAAAA
+CATCACATTACACACCATAAAGATATATAACTTGTTATCAAAAAGAAATATAGCAGTTAAAATTTAAAATTTTTAAAAAA
+CGTCTTTTTGAGGTTCGTACCTCACTTAAGTCACACTGTTCAAAATATTCATGCACTCATTTCTCTCATTCATGTGTTAA
+TGTACAGGGTACGGGCCACTATAAATTCCTTCAGCAACTGGAAAGGAAACTTTATGTACTGAGTGCTCAGAGTTGTATTA
+ACTTTTTTTTTTTTTTGAGCAGCAGCAAGATTTATTGTGAAGAGTGAAAGAACAAAGCTTCCACAGTGTGGAAGGGGACC
+CGAGCGGTTTGCCCAGTTGTATTAACTTCTAATTCAACACTTTAAGATTCTTAGCATTATTGCAGACAACATCAGCTTCA
+CAAGTGTGTGTCCTGTGCAGTTGAACAAGATCCCACACTTAAAAGGATCCTACACTTTTTTTAATGCTCTGCTGTTTCTG
+CCTTGAAATTCTTAACAATTTTTTTAACCAAAGTCCTCACAAATTCAGTTTACATTAGCCCTGCAATCATGTAGACATCC
+TGATTCCAGACAATGTGTCTGGAGGCAGGGTTTACAGGACTTCAAGAACCTTACCTTCTCAACTTTCATCTGCATCTTTA
+CTCCCAACTATATATGAAGATGATGAAGATAGATATGGATGGTGCTTCTACCATACCCTCTTCCTCTGCCAAACTTCCTT
+GATCTAGGATAAGGTCAGTAAACTTCTTCCGTAAAAGGCCAAAAGTAAATATTATAGGCTCTACAGGCCCTAGAGTGTCT
+GTCATAACTACTCAACTCTTATTGTAGCATAAAAACTGTCAACAGACAATACAGAAACAAATGAGTGTGACTGGGTTCCA
+GTGAAACTTTATTTACAAAAGATTTGTCCCATGAGTCAAATTTACCACCTCCAGATCTAGAGAAACAGTTTTGAGCCCTT
+TTATTTTGCTCAACAGTTAAGCATGGCTCCATGTCCCTTATATTTAGTCAGAACTCGGTATGTTTTAAGGAAAGAATGGT
+TACACGAAGACATACATTCATTCATTTATACAACACATTTTCAGTGTTGAATGATAAATTTTGGAATAGTTAACAGATGA
+TAAAAGTGTTGTTTTCAGTCATCCCTATCCAATGAAGTAAAAAAAAAAGTGTTGAATGGGAAGAAATCAAGAATAGTTAT
+ACGAATATCACCATTGCATTAAAGCTCTCTTCCTTGTTTCTAAAAGAATATCTTGACACACATTAAGCTCACTGACCCCC
+ACACCATGAATGAGGGCATCTTCAACAATGGTGGATGACGTCTTAGTTTCCCTCAACTCAGTTAATCTAAGTAAGCTCAT
+GGTATCACTTTCCTGTCCTAGAGGGAACATATTTCCTGCATTTTTCTTTTTTTCCTTACTTTCCATCACCAAGTAACTCT
+TCTGATATTTTTTCTCTTGAGAAAATTAATATGACTCATAGATCTGGTTCCCAAGAGAAATCAATGGAGGCCTGGTTACA
+AGGATCTAAGAAGCATCAATGGGTCACTAACATCTAGTGGTACTAATTAACTCTGTTAATCATTGGGAAGAAAATGTATA
+TATACTTTTGTCTTGGAGCTGATTCTACTAGAAAGCAGAAATCAAAATGATCAGTTTCCCAGTGTCACTACTGCACACCC
+TGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGTCAAGGCAATCTCTCTCTTCTTACATTTCCCT
+TCAACCTTCTATCTCCTCCACTTTTCTGTTTTCCTCCTATCTCCAATTATTTCAATCCTCAGAGCATTATTCTTACAATC
+>Y
+ACGATCAATAGAGTGAAAAAGATAACCTCTTGAATGGGAGAAATATTTGCAAACTACTCATCCAACCGGGGATTGATATC
+CAGAATATACAAGTAACACAAATATGTCAAAAGTAAAATAAATAAATAAATAAATAAATAAATAAATTAAATAAATTATT
+TAAAAATCGGCAGAGGACAGGAATAGACATTTCTCAGGAGACAACATACAAAGGGCCACAGATACATCAAAAAATGCTCA
+ACATCACTATTTGTCAGGGAAGTACTAATTAAAACCAAAATGAGATGTCCCCTCAAACCTGTTAGAATGGCTATTATCAA
+AAAGATGAAAGATAGCAACTATCAGAGAGGATGATAGAAAAGGGAACCCTTGCATCATGTACAAATTAAAAATAGAACTA
+TCACATGATCCAAGAATCCTACTTCTGGGTATATAGCCAAAGGAATTGAAATCAATATGTCAAAGGGATATCTGCACTCC
+TATGTTATTGCAGCATGTTCACAATGGCCAAGATATAGAATCAACCTAACTGTTCATAGACAGATGAATGGATAAATGAA
+ATGTGATATGGAAAATTATTCAGCCTTAAAAACAGTAGGAAATTCTGTCATTTGAGACAACGTGGATGAACCTAGAGGAC
+ATTAAGCTAAGTGAAATAAGCTAGACACAGAAAGACAAATATTGCATGATCTCACTTAGAATCTAAAAAATCTGAACTCA
+TAGAAGCAGAGAATAGTATGATGGTTACTAGGGTTATCTGGCAGGGAGAGGATGAGGAAATGGGACATTGTTAATAAAAG
+GAAAAAAATTCAATTAGTAGGATTACATTCAGGGGACCCAATATACGACATGTTGACTGTAATTAATAATGTATTGTATG
+CTTGAAAATTGCTAATACAGTATATTGTAAATGTTAATATGAGGTAATATATGTGTTAATTAACTTGATTTATTCATTCA
+ACAACATACACATATATTAAAACATCACACTGTATTCCACAAATATATATAATTTTTGTCAATTAAAAAATAATTTTTAA
+AAATGAGAAACAAAAAAGCTGACATTTTCAGATTAAAAAAATTATACAGAAGAATTAATTCATTAAAGTAAAAACAAATG
+TGGGAAAATGGTTTTTAAATATAATTTAAACCAAATTTAAAATAAGCATATAAAGACTATGGACAAAACAAGAAATCCAA
+ATAAAAAATAAACATATGAAGAATATTCAAACTCACTTTTTATCAAAGAAATGTAAATTTTAAAATATAGCATTGCTATT
+GTGTTTTCATAAATAATAATATATCATGGATGAGCCTGTGAGGAAACAGACACTCATACTCTGCAAAGCAATGACTAAGA
+TAATTATGTCAGATCATGAATTACGTTAATTAGCTTGATGGTGGTCACTGTTTCACGATAAATATACATATGTATCAAAA
+CATCACATTACACACCATAAAGATATATAACTTGTTATCAAAAAGAAATATAGCAGTTAAAATTTAAAATTTTTAAAAAA
+CGTCTTTTTGAGGTTCGTACCTCACTTAAGTCACACTGTTCAAAATATTCATGCACTCATTTCTCTCATTCATGTGTTAA
+TGTACAGGGTACGGGCCACTATAAATTCCTTCAGCAACTGGAAAGGAAACTTTATGTACTGAGTGCTCAGAGTTGTATTA
+ACTTTTTTTTTTTTTTGAGCAGCAGCAAGATTTATTGTGAAGAGTGAAAGAACAAAGCTTCCACAGTGTGGAAGGGGACC
+CGAGCGGTTTGCCCAGTTGTATTAACTTCTAATTCAACACTTTAAGATTCTTAGCATTATTGCAGACAACATCAGCTTCA
+CAAGTGTGTGTCCTGTGCAGTTGAACAAGATCCCACACTTAAAAGGATCCTACACTTTTTTTAATGCTCTGCTGTTTCTG
+CCTTGAAATTCTTAACAATTTTTTTAACCAAAGTCCTCACAAATTCAGTTTACATTAGCCCTGCAATCATGTAGACATCC
+TGATTCCAGACAATGTGTCTGGAGGCAGGGTTTACAGGACTTCAAGAACCTTACCTTCTCAACTTTCATCTGCATCTTTA
+CTCCCAACTATATATGAAGATGATGAAGATAGATATGGATGGTGCTTCTACCATACCCTCTTCCTCTGCCAAACTTCCTT
+GATCTAGGATAAGGTCAGTAAACTTCTTCCGTAAAAGGCCAAAAGTAAATATTATAGGCTCTACAGGCCCTAGAGTGTCT
+GTCATAACTACTCAACTCTTATTGTAGCATAAAAACTGTCAACAGACAATACAGAAACAAATGAGTGTGACTGGGTTCCA
+GTGAAACTTTATTTACAAAAGATTTGTCCCATGAGTCAAATTTACCACCTCCAGATCTAGAGAAACAGTTTTGAGCCCTT
+TTATTTTGCTCAACAGTTAAGCATGGCTCCATGTCCCTTATATTTAGTCAGAACTCGGTATGTTTTAAGGAAAGAATGGT
+TACACGAAGACATACATTCATTCATTTATACAACACATTTTCAGTGTTGAATGATAAATTTTGGAATAGTTAACAGATGA
+TAAAAGTGTTGTTTTCAGTCATCCCTATCCAATGAAGTAAAAAAAAAAGTGTTGAATGGGAAGAAATCAAGAATAGTTAT
+ACGAATATCACCATTGCATTAAAGCTCTCTTCCTTGTTTCTAAAAGAATATCTTGACACACATTAAGCTCACTGACCCCC
+ACACCATGAATGAGGGCATCTTCAACAATGGTGGATGACGTCTTAGTTTCCCTCAACTCAGTTAATCTAAGTAAGCTCAT
+GGTATCACTTTCCTGTCCTAGAGGGAACATATTTCCTGCATTTTTCTTTTTTTCCTTACTTTCCATCACCAAGTAACTCT
+TCTGATATTTTTTCTCTTGAGAAAATTAATATGACTCATAGATCTGGTTCCCAAGAGAAATCAATGGAGGCCTGGTTACA
+AGGATCTAAGAAGCATCAATGGGTCACTAACATCTAGTGGTACTAATTAACTCTGTTAATCATTGGGAAGAAAATGTATA
+TATACTTTTGTCTTGGAGCTGATTCTACTAGAAAGCAGAAATCAAAATGATCAGTTTCCCAGTGTCACTACTGCACACCC
+TGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGTCAAGGCAATCTCTCTCTTCTTACATTTCCCT
+TCAACCTTCTATCTCCTCCACTTTTCTGTTTTCCTCCTATCTCCAATTATTTCAATCCTCAGAGCATTATTCTTACAATC
\ No newline at end of file
diff --git a/tests/test_data/ref.fasta.fai b/tests/test_data/ref.fasta.fai
new file mode 100644
index 0000000..8e6d071
--- /dev/null
+++ b/tests/test_data/ref.fasta.fai
@@ -0,0 +1,2 @@
+1 3280 3 80 81
+Y 3280 3327 80 81
diff --git a/tests/test_data/test.bed b/tests/test_data/test.bed
new file mode 100644
index 0000000..3bf9bcf
--- /dev/null
+++ b/tests/test_data/test.bed
@@ -0,0 +1 @@
+Y 1 200
\ No newline at end of file
diff --git a/tests/test_data/test.vcf b/tests/test_data/test.vcf
new file mode 100644
index 0000000..eb85ca3
--- /dev/null
+++ b/tests/test_data/test.vcf
@@ -0,0 +1,18 @@
+##fileformat=VCFv4.1
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1
+1 50 . C G . PASS . GT 1/1/1/1/1/1/0/0/0/0/0/0
+1 250 . T A . PASS . GT 1/1/1/1/1/1/0/0/0/0/0/0
+1 400 . A G . PASS . GT 1/1/1/1/1/1/0/0/0/0/0/0
+1 500 . C G . PASS . GT 1/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0
+1 600 . A C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 700 . A C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 800 . G C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 900 . G C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1000 . C T . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1100 . T C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1200 . A C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1300 . T C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1400 . G C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1500 . A C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
+1 1600 . A C . PASS . GT 1/1/1/1/1/1/1/1/1/1/1/1
\ No newline at end of file
diff --git a/tests/test_data/test_sample1_golden.bam b/tests/test_data/test_sample1_golden.bam
new file mode 100644
index 0000000..82982bc
Binary files /dev/null and b/tests/test_data/test_sample1_golden.bam differ
diff --git a/tests/test_data/test_sample1_golden.bam.bai b/tests/test_data/test_sample1_golden.bam.bai
new file mode 100644
index 0000000..f306258
Binary files /dev/null and b/tests/test_data/test_sample1_golden.bam.bai differ
diff --git a/tests/test_data/test_sample1_read1.fq b/tests/test_data/test_sample1_read1.fq
new file mode 100644
index 0000000..8bf35c6
--- /dev/null
+++ b/tests/test_data/test_sample1_read1.fq
@@ -0,0 +1,7932 @@
+@test_sample1-1-1/1
+AAAAAAAGTGTTGAATGGGAAGAAATCAAGAATAGTTATACGAATATCAC
++
+GGGCGGGFCGE.GGGGBGGGG/?EGGE?GGGG9&FDBEGG@GGECCD>@FGGAEG
+@test_sample1-1-5/1
+TATTTGTCAGGGAAGTACTAATTAAAACCAAAATGAGATGTCCCCTCAAA
++
+GGFEEGGFCGGGDGF
+@test_sample1-1-15/1
+ATGTGTTACTGTACAGGGTACGGGCCACTATAAATTCCTTCAGCAACTGG
++
+D+G6AGFEGGA>FFCGGGCG5DEGFFGGFEFG:G>BGFGGFGGGGGDGEA
+@test_sample1-1-17/1
+GCTACAATAAGAGTTGAGTAGTTATGACAGACACTCTAGGGCCTGTAGAG
++
+GGGF6GGGGGGGGGDGFGFFGG8EFFCGCFFFGGGG#GFFBGGFA>FGG7
+@test_sample1-1-19/1
+GATAGAAAAGGGAACCCTTGCATCATGTACAAATTAAAAATAGAACTATC
++
+GFGGG@A>FG6B:GGDFE'EGEFA9FAGGED<
+@test_sample1-1-21/1
+TACAGTCAACATGTCGTATATTGGGTCCCCTGAATGTAATCCTACTAATT
++
+G..GFGEGE:GGGGEGG;EGEG2F@GFB<FGGGGEE8FGG6GCEA45?
+@test_sample1-1-25/1
+AATTTATTTATTTATTTATTTATTTATTTATTTTACTTTTGACATATTTG
++
+F8GG@FCGFFGGGF)FGF.FFCGGFGGGGGGGFCAGGBFGG6=CFFCEBG
+@test_sample1-1-27/1
+TAGAGGACATTAAGCTAAGTGAAATAAGCTAGACACAGAAAGACAAATAT
++
+3DGGG,EGGFFDGFFFGFG9GCE>CGDGG%EAGGGE3FG-DB/G
+@test_sample1-1-29/1
+GTTAACTATTCCAAAATTTATCATTCAACACTGAAAATGTGTTGTATAAA
++
+GFFG99CG9,GGG;FFFGFGEBF3GFCFDGFBGF7BGF?FFFFFF;F0?B
+@test_sample1-1-31/1
+AGAGGGAACATATTTCCTGCATTTTTCTTTTTTTCCTTACTTTCCATCAC
++
+EEGCGG(E:GGGGDF>FGFGGDDGGEGGDDGFGGGGFGGGG<1>D@;GGE
+@test_sample1-1-33/1
+CTTTGATAAAAAGTGAGTTTGAATATTCTTCATATGTTTATTTTTTATGT
++
+GGBFGG@GGB*GG?@GFGGGGGGF?BDGD
+@test_sample1-1-37/1
+TATGCTTGAAAATTGCTAATACACTATATTGTAAATGTTAATATGAGGTA
++
+BAGGGBFEFGGCF:CGGDGEGDBGEGDGGGGCGFFAGGG8EFGGGGGCCA
+@test_sample1-1-39/1
+GCTATCTTTCATCTTTTTGATAATAGCCATTCTAACAGGTTTGAGGGGAC
++
+CG5FB0GFGGFG=GGGGGDFGGDGEGGGGGGGFGGEDG@AG6G?F,8?FD
+@test_sample1-1-41/1
+TCTACATGATTGCAGGGCTAATGTAAACTGAATTTGTGAGGACTTTGGTT
++
+FGEFGGF=GGE>GG?GGGAEGFGGG1GEFFGFGF?EGFCDEADGG6GG@)
+@test_sample1-1-43/1
+AAATTATATTTAAAAACCATTTTCCCACATTTGTTTTTACTTTAATGGAT
++
+BFEGFEGGFGGGGFGEDGDGFGGEGFGGGFGGEFGGGGGFF?EG7GD?GEGGG1GGDD7GGCGFGAGFCGE=FGFEFE.CC
+@test_sample1-1-49/1
+ATGTGTATGTTGTTGAATGAATAAATCAAGTTAATTAACACATATATTAC
++
+GGGFFGGGCG??GGBDGDGFEGFGGGBGFF8F;FEG:F3AG?9BFG3GGGFGFGGFCAE7?9DE$
+@test_sample1-1-53/1
+GAACCTAGAGGACATTAAGCTAAGTGAAATAAGCTAGACACAGAAAGACA
++
+>GFGGGGFEE:GFGGGGGGBFA:FE1FD3G=GGGGGFGFG@FGFGEB@EE
+@test_sample1-1-55/1
+CACCATCCATATCTATCTTCATCATCTTCATATATAGTTGGGAGTAAAGA
++
+FCFF>GGGGEGGGGGGFFFGGGFEGG,GGFFGGC3GFGCGGDGAGGDGGF
+@test_sample1-1-57/1
+TCTTCATATGTTTATTTTTTATGTGGATTTCTTGTTTTGTCCATAGTCTT
++
+GGGFG>GFGGGFFG87A8EFBGGGFGGFFGFF@+GGGFFGECD7:DEBGF
+@test_sample1-1-59/1
+CATGTAGACATCCTGATTCCAGACAATGTGTCTGGAGGCAGGGTTTACAG
++
+DF:EGF>9FC
+@test_sample1-1-63/1
+AAATGTATATATACTTTTGTCTTGGAGCTGATTCTACTAGAAAGCAGAAA
++
+GGGEFGGDGE?GGGFGFAF3FEEB6CG6FGGFG1,G9FGGFGF8AGGCAGEGF=0-DDGFE?<7BF
+@test_sample1-1-73/1
+TTATGCTACAATAAGAGTTGAGTAGTTATGACAGACACTCTAGGGCCTGT
++
+EFGFFGGFF@GGFGDB9F:GCEFGGGCBDFAGGEFGG2
+@test_sample1-1-75/1
+GATCTAGGATAAGGTCAGTAAACTTCTTCCGTAAAAGGCCAAAAGTAAAT
++
+GDGGEEDFF=GGFG@GBG"GGEGEFEDGGFFFD(GGCEFG>GGG
+@test_sample1-1-83/1
+TAAAAATCGGCAGAGGACAGGAATAGACATTTCTCAGGAGACAACATACA
++
+EFFGGGEGDGGGFGGCDGGFGGAFGCFFG@:>EFDGGFFFG@GFA=FFGFFGDGGGGEFFGG;DC=FGFD
+@test_sample1-1-91/1
+TCAAAAAGACGTTTTTTAAAAATTTTAAATTGTAACTGCTATATTTCTTT
++
+AED5GGFFGGF2;GGGGGGGAGCGD@EGFGFFBGGGB>C1D*GEF
+@test_sample1-1-95/1
+TATGGTGTGTAATGTGATGTTTTGATACATATGTATATTTATCGTGAAAC
++
+GFGFCFDGGED2DFFFAG4G@GGFGGFGGFA>)EEGF)&+=DGGGGGGGGEEGG=FCFGGDE:G;F.0?DGGGFBGGFG8GF9GFGD?ED9GGGF
+@test_sample1-1-103/1
+TTCTCTCATTCATGTGTTACTGTACAGGGTACGGGCCACTATAAATTCCT
++
+GGGG@G;GGEFGG7BFG2GGGFEFGGGFGDGG3FEB;EFFFC=D@6E=CG
+@test_sample1-1-105/1
+TTAAAAATAGAACTATCACATGATCCAAGAATCCTACTTCTGGGTATATA
++
+GGGGGE@FCG9CGDFFAFGGGFGFEEGGGGGG>AGCGGE9GEBB9GEE2=
+@test_sample1-1-107/1
+ATAAAAAGTGAGTTTGAATATTCTTCATATGTTTATTTTTTATGTGGATT
++
+?F@G@:CGGFGDFFFGGFG9GG*GGB@AG*@@DGF0FF//2FE6)2=FG/
+@test_sample1-1-109/1
+AATACACTATATTGTAAATGTTAATATGAGGTAATATATGTGTTAATTAA
++
+A6GGG>DFGGG1G@F?GBFGGGA.GGFCG+GGCA,')%8CGGG
+@test_sample1-1-113/1
+CAATGGAGGCCTGGTTACAAGGATCTAAGAAGCATCAATGGGTCACTAAC
++
+GFGEAGFFCFEDFGE>DGFGFDEGFGFFG>DG8GCEFGGDFAFG%$F=FF
+@test_sample1-1-115/1
+CATACAATACATTATTAATTACAGTCAACATGTCGTATATTGGGTCCCCT
++
+A@CGEBGFGGGDD@GCFF;DGGFCGDGGGEDDEFGCFFG8FCFC;FG<@C
+@test_sample1-1-117/1
+CTGCCAGATAACCCTAGTAACCATCATACTATTCTCTGCTTCTATGAGTT
++
+GCCGG9FGGGGGGEAADCGFGADGGGGECAD#GGCGBBGGFDEDFEFDFEF<
+@test_sample1-1-121/1
+TATTTGCAAACTACTCATCCAACCGGGGATTGATATCCAGAATATACAAG
++
+GDDGGCG?GFGGCGGDFGGF>G?GGGFGGF06-GFGFFGDGGFGGFGCGG
+@test_sample1-1-123/1
+CAACACTTTTTTTTTTACTTCATTGGATAGGGATGACTGAAAACAACACT
++
+G4GGG8GGEFFCD4GFAAEGC7BGGGGGEGF@GG;?GGFGFF?@F7BEGF
+@test_sample1-1-125/1
+TGGAATAGTTAACAGATGATAAAAGTGTTGTTTTCAGTCATCCCTATCCA
++
+FEGGG=GGGGECGCG8GDEGGDFGDE@AFGGF
+@test_sample1-1-127/1
+AAAGAAATGTAAATTTTAAAATATAGCATTGCTATTGTGTTTTCATAAAT
++
+GGGCDFGB64FDBFFFGGBBGE.G82?FFGFGGGGGGGGGG<@F1;>B0G
+@test_sample1-1-131/1
+GAAATCAATGGAGGCCTGGTTACAAGGATCTAAGAAGCATCAATGGGTCA
++
+GG@FFGG7EGEEG?FGDEDDGEEFA$EFCFFC0GG>GBGEGGGEEG=GCF>@GBFGC>FFGFGFAB@E@FAFBGG
+@test_sample1-1-143/1
+CAGACACATTGTCTGGAATCAGGATGTCTACATGATTGCAGGGCTAATGT
++
+DGGF.GGGFFGGGG7-FG$/DFGGG
+@test_sample1-1-145/1
+AACCTTACCTTCTCAACTTTCATCTGCATCTTTACTCCCAACTATATATG
++
+GGGEEGGGG2GBFBGGGGG@:EGGGGDGGE.;GG;GDFGDFGGBE:25FC
+@test_sample1-1-147/1
+GAAACAAATGAGTGTGACTGGGTTCCAGTGAAACTTTATTTACAAAAGAT
++
+GFEGG9FFGGGGFGGDGGGA:EEF?4GGFFA8FCGGGGFD7GE090AFGB
+@test_sample1-1-149/1
+GAATTTGTGAGGACTTTGGTTAAAAAAATTGTTAAGAATTTCAAGGCAGA
++
+GDA?FF6GFGFDFGGGGEGGG@1GGDGGGGEFG?GGGD8<-3BGDFGFFE
+@test_sample1-1-151/1
+TGAGCTTACTTAGATTAACTGAGTTGAGGGAAACTAAGACGTCATCCACC
++
+GDGGGG=GFD)FEGEGCGACGGEG;:CDCGFGGGGEGGGGGG0FECEFFF
+@test_sample1-1-153/1
+ATGCTCTGAGGATTGAAATAATTGGAGATAGGAGGAAAACAGAAAAGTGG
++
+EGF4GGBFDGGCB7FGEB?F;GGG;EGEF@C;FFBBEF@CAEGGC@GGB?
+@test_sample1-1-155/1
+TGGATAAATGAAATGTGATATGGAAAATTATTCAGCCTTAAAAACAGTAG
++
+F1GFDGFG@GAFGCCFFD53GGFFBFGCAF?GB?CAFGGGG+E0FE$GF:
+@test_sample1-1-157/1
+TTGTGAACATGCTGCAATAACATAGGAGTGCAGATATCCCTTTGACATAT
++
+GDGGBEGGGGGGFGEGEBBEFG8FFGEGFD
+@test_sample1-1-163/1
+AAGTTTCCTTTCCAGTTGCTGAAGGAATTTATAGTGGCCCGTACCCTGTA
++
+=GGF3GGFBF7AGGA@G:GGGGGGGFFGGFCFGGFGDDGFF
+@test_sample1-1-165/1
+CATGTGACAGTTCTATTTTTAATTTGTACATGATGCAAGGGTTCCCTTTT
++
+;EGEGFGGDGGF?DG$G:>GBEGGEAGGFEGG'FFCFGBE-@@FFGGB=E
+@test_sample1-1-167/1
+TTTTGCTCAACAGTTAAGCATGGCTCCATGTCCCTTATATTTAGTCAGAA
++
+FGFBFGFGCGGB;8GF?DF+@GA?GGGFFDF@GGGG@GGGGFGF2GBFGA>AFA
+@test_sample1-1-173/1
+AAAAAAGTTAATACAACTCTGAGCACTCAGTACATAAAGTTTCCTTTCCA
++
+GCG7GGG;>GGDFGGFEGGEGGGG;?GGGFGCAGG
+@test_sample1-1-183/1
+TTTCTCATTTTTAAAAATTATTTTTTAATTGACAAAAATTATATATATTT
++
+FG@FECGDGEGCF
+@test_sample1-1-187/1
+CTTTTGTAAATAAAGTTTCACTGGAACCCAGTCACACTCATTTGTTTCTG
++
+FG'CFGGGFGGAGGGGGGBFFFGFG8FDEGDFGGGFGGFGBFFFFEGGGG
+@test_sample1-1-189/1
+ATTCCAAAATTTATCATTCAACACTGAAAATGTGTTGTATAAATGAATGA
++
+FGGGG>GDCGBEGEGGDFB1FG7C$GGCDF4FGFCE@EGG@ED''CFF4&
+@test_sample1-1-191/1
+GCAATAATGCTAAGAATCTTAAAGTGTTGAATTAGAAGTTAATACAACTG
++
+FGG8GGGFFGGFGGGG=9*3GGGEGDEFEGGGGGEF1DG@GF=?>GGGFG
+@test_sample1-1-193/1
+GATCTAGGATAAGGTCAGTAAACTTCTTCCGTAAAAGGCCAAAAGTAAAT
++
+FGGGFG,FFGGG;G45GCEGFGFGGEG3G==GGGF4FGE?FGFGEGBFGG
+@test_sample1-1-195/1
+CTTCCACACTGTGGAAGCTTTGTTCTTTCACTCTTCACAATAAATCTTGC
++
+GDGFGGFGGFGG5GCGGBFFGD98G4:GGGGEGDGFEDGFFGGF/7DFE8
+@test_sample1-1-197/1
+GACATTGTTAATAAAACGAAAAAAATTCAATTAGTAGGATTACATTCAGG
++
+GGGGGGGFFGGGCGGFGFGGF49GGEFGGFGGFGG;FGGGG6GFGFB(G4BD?
+@test_sample1-1-205/1
+AATAATAACATATCATGGATGAGCCTGTGAGGAAACAGACACTCATACTC
++
+GGGFG@GGFGGFFEGFGGC;F>3BGGGDGG>A@FGFGDGDFGDAG,.F8C
+@test_sample1-1-207/1
+AAAAGATAACCTCTTGAATGGGAGAAATATTTGCAAACTACTCATCCAAC
++
+GGGDFCGGGGGGGGG7FEFFGG>GFGGE/GGGGFFGFFGG>*DGG>177;
+@test_sample1-1-209/1
+CAATAGCAATGCTATATTTTAAAATTTACATTTCTTTGATAAAAAGTGAG
++
+6FFFEGGFF?GFGGFDGG5EEE>CFDGFBDGFGF8GFFEFFEF:E@8G:D
+@test_sample1-1-211/1
+ACACTGTATTCCATAAATATATATAATTTTTGTCAATTAAAAAATAATTT
++
+BGC3C;FGGGCGEEBFEGFGGGGDGGFE=G9GG;GDDBG>BGGFCAFDFF
+@test_sample1-1-213/1
+TGTTTATTTTTTATGTGGATTTCTTGTTTTGTCCATAGTCTTTATATGCT
++
+DGAGGGGGGFGDGEGFGGFGDGF;BG
+@test_sample1-1-215/1
+AAGTGTAGGATCCTTTTAAGTGTGGGATCTTGTTCAACTGCACAGGACAC
++
+GGGGGCGFGGFGFCAFGEF
+@test_sample1-1-223/1
+TGAATGGATAAATGAAATGTGATATGGAAAATTATTCAGCCTTAAAAACA
++
+GGGDGGC>GGGDGGFEGGFG=73F?GGFEGGGGFGCD-DFGFF?FF7D;?
+@test_sample1-1-225/1
+GAAGGGAAATGTAAGAAGAGAGAGATTGCCTTGACATTGCTAAACTTTGG
++
+CFGE9
+@test_sample1-1-227/1
+TTCCTTGATCTAGGATAAGGTCAGTAAACTTCTTCCGTAAAAGGCCAAAA
++
+GFGFGGGGFEGGGGEFGGGCFGGFAAGF8G?EGCFG-GEFFBGC=FAEED
+@test_sample1-1-229/1
+TCCAAGAATCCTACTTCTGGGTATATAGCCAAAGGAATTGAAATCAATAT
++
+ECGFGGGGGGGGGC>GE?FFFGC@CEGC4C9
+@test_sample1-1-233/1
+TTTTAATCTGAAAATGTCAGCTTTTTTGTTTCTCATTTTTAAAAATTATT
++
+GFGG:GG>GGGGGGFGGEGGA347GFG:%GDGGFEFGFGFGFAEEG4+E9
+@test_sample1-1-235/1
+CAAAATATTCATGCACTCATTTCTCTCATTCATGTGTTACTGTACAGGGT
++
+GFFGGGGE=E0G2CFGGA:31CDGGGFGG#>FGGGFGDFGGGFCFFD@>E
+@test_sample1-1-237/1
+ATGCTTATTTTAAATTTGGTTTAAATTATATTTAAAAACCATTTTCCCAC
++
+GFG?FGFGFFFGCDGGGGGFFGFGGGGGGGFFFG@GGFGGD=FFF='/5E
+@test_sample1-1-239/1
+TTAATCCATTAAAGTAAAAACAAATGTGGGAAAATGGTTTTTAAATATAA
++
+GGEGFFDFGDG,FGGEEAGDDFGFG>62>D;@=FGEG=?
+@test_sample1-1-243/1
+TGTCTGCAATAATGCTAAGAATCTTAAAGTGTTGAATTAGAAGTTAATAC
++
+GFGECFCGGGFFFGGAFCGG?:G:EDFFBF?DFFGGFE=DFF
+@test_sample1-1-247/1
+AGGATTCTTGGATCATGTGATAGTTCTATTTTTAATTTGTACATGATGCA
++
+EGFCGFGGGGGAG=EFGEGGFGGGB?EFEDGBGGGFGCECGC@DAGGG@A
+@test_sample1-1-249/1
+GGAATAGTTAACAGATGATAAAAGTGTTGTTTTCAGTCATCCCTATCCAA
++
+GFGF&GFFGEGGEFAGGFGC3FFF=AGGGFF99GGEFGADGB-'AGGCFF
+@test_sample1-1-251/1
+CAAATTCAGTTTACATTAGCCCTGCAATCATGTAGACATCCTGATTCCAG
++
+GGFG8GEDGAGFGFGGGG@GGGGGE5CGFFGGFFGGFGGFEG
+@test_sample1-1-253/1
+TTTGATTTCTGCTTTCTAGTAGAATCAGCTCCAAGACAAAAGTATATATA
++
+?@GGGE8E@G:G=FGGF;GGBFGE>FFFGFGG,)FGFBGBE5FG>GFGGGGEGF1GGFGGGFGGG;G?GD8C19FEEFG9<
+@test_sample1-1-257/1
+GATATCCAGAATATACAAGTAACACAAATATGTCAAAAGTAAAATAAATA
++
+@FGF:EE?GFF;FG:EG5DGGEG&*G
+@test_sample1-1-261/1
+AGTTGCTGAAGGAATTTATAGTGGCCCGTACCCTGTACAGTAACACATGA
++
+GE<
+@test_sample1-1-263/1
+TACTGACCTTATCCTAGATCAAGGAAGTTTGGCAGAGGAAGAGGGTATGG
++
+.FGFGGFFFEFGGGGGGGGGGF@AEGEFFDGFGGGFFCDGGFE4DGD9EG
+@test_sample1-1-265/1
+CTCCTATGTTATTGCAGCATGTTCACAATGGCCAAGATATAGAATCAACC
++
+GGFFFFGGFGGG+#GFGGFGGDFFC0EFGGGFEAGFFAGFGG2DFBF@F$
+@test_sample1-1-267/1
+ACTTCATTGGATAGGGATGACTGAAAACAACACTTTTATCATCTGTTAAC
++
+GECG@F@F;=GGC;GFFGGCGGGGFGGGGGD@GGEEGGGGFDFGEGFD=F
+@test_sample1-1-269/1
+TATGAAGATGATGAAGATAGATATGGATGGTGCTTCTACCATACCCTCTT
++
+GCGFGB:6G?GGEFCFGFF>FGG4DGGFE:6@FFG?D2GGACFGGBF'2:
+@test_sample1-1-271/1
+ATATCACATTTCATTTATCCATTCATCTGTCTATGAACAGTTAGGTTGAT
++
+GBGGFGGGGGGGGGGEFG0>EGGFAFGG/DGGGGGACFFF
+@test_sample1-1-275/1
+AAATAGAACTATCACATGATCCAAGAATCCTACTTCTGGGTATATAGCCA
++
+GGGG@GGGGGFGG1GGFFACCGGGFFFGFFGGGGGFFGGCGG%F2@FFFD
+@test_sample1-1-277/1
+CTGTATTCCATAAATATATATAATTTTTGTCAATTAAAAAATAATTTTTA
++
+GGGEGF@CFGGFFEGEGGGG=-FFG-GGDA2GCGE26DGGGDFGE?5EAG
+@test_sample1-1-279/1
+GTCAATTAAAAAATAATTTTTAAAAATGAGAAACAAAAAAGCTGACATTT
++
+FGFGGFGGGGEAEDCGFGG@CGGGFEG59;06GGGEFC@E:<CDF:FDGGFGGEGGGFFGGD>#*.GDA
+@test_sample1-1-283/1
+GTTAATTAACACATATATTACCTCATATTAACATTTACAATATAGTGTAT
++
+GBB;E:G2GGEGGDGGGE9GFGGFGGFGGE9@4GGFFFAGEGFG=@GD9DE
+@test_sample1-1-291/1
+TTGTCTTGGAGCTGATTCTACTAGAAAGCAGAAATCAAAATGATCAGTTT
++
+DBGEBBEGGEGFGDGGFGGFGFBGGGGGGGGGG2GFGFFF=DGGF@BEG;
+@test_sample1-1-293/1
+TTTCACTCTTCACAATAAATCTTGCTGCTGCTCAAAAAAAAAAAAAAGTT
++
+G:GEGGFGGG6GDGGFGFDGGGDGDEGGF&FFEGBGGADG?8CG0?DE91
+@test_sample1-1-295/1
+TCTGTTGACAGTTTTTATGCTACAATAAGAGTTGAGTAGTTATGACAGAC
++
+>FGCGGGFGGEGGFFFGGGB1GGGFEF=BFGCDGFFGAGEGGFGGFGA?GFEGGGGFFGFGC;@
+@test_sample1-1-309/1
+TCTTGAATGGGAGAAATATTTGCAAACTACTCATCCAACCGGGGATTGAT
++
+4DGFFFGF?GGGGGGD9@
+@test_sample1-1-321/1
+AAAGAAATATAGCAGTTACAATTTAAAATTTTTAAAAAACGTCTTTTTGA
++
+GGG:GGGGGGGFBGGGG@GGBG7FEFFF;D>GGGF,EDFC?FFGGFF
+@test_sample1-1-327/1
+CTCTTCCTCTGCCAAACTTCCTTGATCTAGGATAAGGTCAGTAAACTTCT
++
+FG:FFGAGFGDGGG*DFGGFGFEFGGGGFGGGGGG5GFGEGFFGFBGCEE
+@test_sample1-1-329/1
+TCCAGGGTGTGCAGTAGTGACACTGGGAAACTGATCATTTTGATTTCTGC
++
+GGGFDC?>*FFGGGGEGGFEFFEEGDFFGGGFE?FGFGGGEFFGGGG7GGGGGCGGGFFG@GGFGF=GGGC&?F6GA
+@test_sample1-1-333/1
+TAGATATGGATGGTGCTTCTACCATACCCTCTTCCTCTGCCAAACTTCCT
++
+FFCGFGG@GG;E7@GCFFGEFGG57G?A9-FE7;GGGE=GEG?GFGFGG1DEGGD=GGFGGDGGFGEGF6DGFAG5DBECFDD
+@test_sample1-1-337/1
+TGATGTTGAGCATTTTTTGATGTATCTGTGGCCCTTTGTATGTTGTCTCC
++
+FG6F=FCF@FAFFFFFGDG8FGGGGFGGGC6?GFEFGDGF@7F@
+@test_sample1-1-339/1
+TTTCTTTTTGATAACAAGTTATATATCTTTATGGTGTGTAATGTGATGTT
++
+GFGGFGGGGGDDDGGFGEC2GG@CBDFGFGCFA4=GFGGGDGFGGEBFF2?FF@3GGFGDGGE&GGG..;GCFF
+@test_sample1-1-343/1
+CCTCATATTAACATTTACAATATAGTGTATTAGCAATTTTCAAGCATACA
++
+GGGGGDGF-GGFFGGDFFGGGGFEFGGEFEGGEFAGGGG:FGGF2@C3D=
+@test_sample1-1-345/1
+GCTCAACATCACTATTTGTCAGGGAAGTACTAATTAAAACCAAAATGAGA
++
+G9FGGFBG:GGFGGFGFADFFFFG>CGEF?;FD48AADAFEG<8GA
+@test_sample1-1-349/1
+CATGGATGAGCCTGTGAGGAAACAGACACTCATACTCTGCAAAGCAATGA
++
+GEGCGGEGG?DGGGF8F>FFAE8@DGFB@GAGGEGGFG?GGC9GGGGFGC
+@test_sample1-1-351/1
+TTTTCTTTTTTTCCTTACTTTCCATCACCAAGTAACTCTTCTGATATTTT
++
+GEGCGGFGGGGGGGGFEDGFGGG;GG0=CFFGGGGF;?GEGCF7EEEEGE
+@test_sample1-1-353/1
+TTAACATTTACAATATAGTGTATTAGCAATTTTCAAGCATACAATACATT
++
+EG80DGG'GGGGFGG#DFFG?>GGAFFFGGGB9>GGG=GDGFAGGGFGFF
+@test_sample1-1-355/1
+AAGAAGAGAGAGATTGCCTTGACATTGCTAAACTTTGGGAGCGTTTTTCT
++
+GEFGGFGFGGG)@FF)FGGGFBGGFGGGFCGGEFGF<1GEGE@;:9D'C(
+@test_sample1-1-357/1
+AAAGTGTTGAATGGGAAGAAATCAAGAATAGTTATACGAATATCACCATT
++
+GGGFGEGE6GGG.8.EEGF:EBBGFFDGEEFGF4EGG415FGGFG;GE1C
+@test_sample1-1-359/1
+AAGACTATGGACAAAACAAGAAATCCACATAAAAAATAAACATATGAAGA
++
+GGGGG4GGG9BDG:GFGFFGGG8GFF9FGGFGGGGGGGFFFFG;6EFEFF
+@test_sample1-1-361/1
+TTACATTTCTTTGATAAAAAGTGAGTTTGAATATTCTTCATATGTTTATT
++
+GG(GFBCE3ABF@GCCFED@FF4FGGGGG76AE5AF@<
+@test_sample1-1-365/1
+CCAGTGTCACTACTGCACACCCTGGAACAGAACAGGTAGGTCAGAAAAAC
++
+GEGDGFGG@GE>EG4CGGFGGFF2=GF@?3GG
+@test_sample1-1-371/1
+GCAATCATGTAGACATCCTGATTCCAGACAATGTGTCTGGAGGCAGGGTT
++
+G@GE7FGCGCFGE::9GFGFG4GGGC4GG>FFFFEFBEAEGGGGG=G?EF
+@test_sample1-1-373/1
+ACAACGTGGATGAACCTAGAGGACATTAAGCTAAGTGAAATAAGCTAGAC
++
+GDG1D-2FDCGGGGGGGGDGGD@FFGGGG:EGGFGFD?GGG=CFF?GGF@
+@test_sample1-1-375/1
+CCCCTGAATGTAATCCTACTAATTGAATTTTTTTCGTTTTATTAACAATG
++
+GFGF3FDDAEEGGGDEDF?F>E<;FGGBFFFGFGGEG2G>GCAFGGGFGF
+@test_sample1-1-377/1
+TATGGTAGAAGCACCATCCATATCTATCTTCATCATCTTCATATATAGTT
++
+GA9GAFEGGFGGFGGGEFEGGDFGCFFDDCDF+F=AFAEDGFGBE5GCFF
+@test_sample1-1-379/1
+CCCTCATTCATGGTGTGGGGGTCAGTGAGCTTAATGTGTGTCAAGATATT
++
+GGFGB;GDFGDFF:FGGE8EG4GFF?FGGG8GFFFA:FG<
+@test_sample1-1-385/1
+CATCTGTTAACTATTCCAAAATTTATCATTCAACACTGAAAATGTGTTGT
++
+GBG>5FBFGGEGGG:GGGF=GEFGGGF4>GGGGGG7GFGAFD5GFFFC3,
+@test_sample1-1-387/1
+TGTAGAGCCTATAATATTTACTTTTGGCCTTTTACGGAAGAAGTTTACTG
++
+CGGAGGDGG9GDGCGGD>GFCGG>GF>GG4EFFGGGGFAFFGFCFF+CGG
+@test_sample1-1-389/1
+GCATGGCTCCATGTCCCTTATATTTAGTCAGAACTCGGTATGTTTTAAGG
++
+DFGGGG+AFGGGGGDC5GGFCGBGEGGEGGFGCFFGFGGG7GG>G>>FG?
+@test_sample1-1-391/1
+AGATGAATGGATAAATGAAATGTGATATGGAAAATTATTCAGCCTTAAAA
++
+GCFFBGFGGFEBE-E@F:F-DE;?G
+@test_sample1-1-393/1
+TTTGGAATAGTTAACAGATGATAAAAGTGTTGTTTTCAGTCATCCCTATC
++
+GGGFFG,@GFFGGB.2DEFEF@GGGFGG?5GGAGG@GGCEFG'GDF1GFE
+@test_sample1-1-395/1
+CTCTGTTAATCATTGGGAAGAAAATGTATATATACTTTTGTCTTGGAGCT
++
+GF@DGGGGGG;G9GGDD9GFGFD,GFGG>G6CGGFGFFFGFEGGACGEFG
+@test_sample1-1-397/1
+AAGATTCTTAGCATTATTGCAGACAACATCAGCTTCACAAGTGTGTGTCC
++
+DFGGGDF@GFGGGGGGGFGD>FFAGFGG>F?GGGGGGF9GFFF
+@test_sample1-1-403/1
+CCCATGAGTCAAATTTACCACCTCCAGATCTAGAGAAACAGTTTTGAGCC
++
+FGGGFGGEEGDFGGEFG9GGGAGEEFGFFGGGC7F$>BFDGAFGGD2?GF
+@test_sample1-1-405/1
+AGGGTTCCCTTTTCTATCATCCTCTCTGATAGTTGCTATCTTTCATCTTT
++
+GFBGGFC54,GGBEGG?FG3GGGEEFFFGAGFAGGGDFG@GGFBFGE.C=
+@test_sample1-1-407/1
+TTATTTATGAAAACACAATAGCAATGCTATATTTTAAAATTTACATTTCT
++
+FEGGFEDGGE>GGDFFGGBF?GFGFF=DGEF4GFE4?FGFFFF8?GCFA?
+@test_sample1-1-409/1
+AGAGTGAAAGAACAAAGCTTCCACAGTGTGGAAGGGGACCCGAGCGGTTT
++
+2GFGGGG>F@GFFGGFCGG>GGGGGFGFGGGGGGDF
+@test_sample1-1-413/1
+GTAATTAATAATGTATTGTATGCTTGAAAATTGCTAATACACTATATTGT
++
+GGEGBGGFGGGGGGGAFFGG@CFE8FGG
+@test_sample1-1-417/1
+TCAGAAGAGTTACTTGGTGATGGAAAGTAAGGAAAAAAAGAAAAATGCAG
++
+DGGGGFF,DGFGCFGGFGGG'/FCD3G=FGGGGGDEC1D<;*?0G>4
+@test_sample1-1-419/1
+CTGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGT
++
+GGD:GA1EG@=DBF?BFGFGEFGEGGEFFGGGGFGEGFACADCFD>E*GE
+@test_sample1-1-421/1
+TCTAGGGCCTGTAGAGCCTATAATATTTACTTTTGGCCTTTTACGGAAGA
++
+GEFCGGEFE@GF;FGGFFG#'GFGGABGGGGCFGEF0GFAGFE/GEG>D@
+@test_sample1-1-423/1
+AAAGATAGCAACTATCAGAGAGGATGATAGAAAAGGGAACCCTTGCATCA
++
+DGGGGFGGGGFGGG$F>?=EDGGGEGG6FGFFEGFDEGGFFCDFFGGGD?
+@test_sample1-1-425/1
+TCTTCATATATAGTTGGGAGTAAAGATGCAGATGAAAGTTGAGAAGGTAA
++
+FGGGEGGFGGGGFG1EGFF>GGCGC2FBFGGGGGCGEFGDGGDGDAF9GG
+@test_sample1-1-427/1
+TTTCCCAGTGTCACTACTGCACACCCTGGAACAGAACAGGTAGGTCAGAA
++
+GEGBGFEEFFFGGFGGGGGGFCGE-%AGCGFGGGDGFDGGCGGGCGD7FGGFGGFEBFGGFB
+@test_sample1-1-431/1
+AGCAAGATTTATTGTGAAGAGTGAAAGAACAAAGCTTCCACAGTGTGGAA
++
+GG9GAGFGFGBGG@FG8GG;F5GDGGGGCEFGF=EBDECGGGCCGGG,=;G@EDDFGGGEEF7F?F:
+@test_sample1-1-439/1
+CCCGAGCGGTTTGCCCAGTTGTATTAACTTCTAATTCAACACTTTAAGAT
++
+GGGFGGGGFAGGGFGECGDGD?GGGFGFFBGDFAFGGEGGE7A67
+@test_sample1-1-441/1
+AGAGAATAGTATGATGGTTACTAGGGTTATCTGGCAGGGAGAGGATGAGG
++
+GG5GGFCEGGFE8FFGCG;GGGGF4E1DCFGGFGGB/FEFGG=FGDD:?@
+@test_sample1-1-443/1
+GTGGATTTCTTGTTTTGTCCATAGTCTTTATATGCTTATTTTAAATTTGG
++
+GGGEGDFGGBG?GE@EGGDCGEEFGGFGEGGGEFGGFGGGAFCGDGGGGDGGGFF?/0#DA
+@test_sample1-1-449/1
+TTCACAATGGCCAAGATATAGAATCAACCTAACTGTTCATAGACAGATGA
++
+GGFFFGCGFFGGGGDFGG>GDGGEGGG?GGGFGCED=DG9FGFFBG66><
+@test_sample1-1-451/1
+ATTACATTCAGGGGACCCAATATACGACATGTTGACTGTAATTAATAATG
++
+GEEGGFGGGGFEGGFF5AGDE?GGGEFBFGGGFGGGGB:GG7EGG>4>D8
+@test_sample1-1-453/1
+CCCAGTCACACTCATTTGTTTCTGTATTGTCTGTTGACAGTTTTTATGCT
++
+GGGGFDCGGD*>GGFGGB>DF
+@test_sample1-1-457/1
+CTTAGTTTCCCTCAACTCAGTTAATCTAAGTAAGCTCATGGTATCACTTT
++
+G>GG/FGGAECEFDGG=FGGGAGGDG?GFDGCD5GCGGGEFDA=C@FE@3
+@test_sample1-1-459/1
+GTAAATGTTAATATGAGGTAATATATGTGTTAATTAACTTGATTTATTCA
++
+DGDGFFFG=GGFGGGGGEGFGFG4>GEFGCGGEA>?@GGGGFFAFEFG?A
+@test_sample1-1-461/1
+GTATTGTCTGTTGACAGTTTTTATGCTACAATAAGAGTTGAGTAGTTATG
++
+GGGFFD+/GGEGGGFGFF@AC.EGF
+@test_sample1-1-471/1
+TGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGTC
++
+=GGGGCGGG?A)@?G-;GF:>BG;-F
+@test_sample1-1-477/1
+TTACACGAAGACATACATTCATTCATTTATACAACACATTTTCAGTGTTG
++
+DEFG?GGGG8EFD
+@test_sample1-1-487/1
+TGTAATTAATAATGTATTGTATGCTTGAAAATTGCTAATACACTATATTG
++
+G,FEGEF=GBGFGGGFGGGGGGGGGGGFFEGG88EDGGGEDCCFGFF@DCFGGFEGE?DBFF>G;D>EEFA?EFEGFFGGG6FGGDGF:GGDDFFFGGGDDGG@GGGGAGGCFGGGGGFGGF>CEFBE/C9E2D>G
+@test_sample1-1-497/1
+CAAAAAGAAATATAGCAGTTACAATTTAAAATTTTTAAAAAACGTCTTTT
++
+GGCFG@FFGGEBGF4=DGEFGEGGGGGG?F@@FDGEAG?DGDBD9GF8FC
+@test_sample1-1-499/1
+TGTAAGAATAATGCTCTGAGGATTGAAATAATTGGAGATAGGAGGAAAAC
++
+G5GGGFDGGGFGEGEEGGAFGGGGACC-FGGFGFGFGGGDGEG=FGGFEF
+@test_sample1-1-501/1
+TACGGGCCACTATAAATTCCTTCAGCAACTGGAAAGGAAACTTTATGTAC
++
+GGD;GGEGGEGGFGF@>0FFCEFGE)FEF(GEB-C7FBEC61G@?+=CFG
+@test_sample1-1-503/1
+CAAAAGATTTGTCCCATGAGTCAAATTTACCACCTCCAGATCTAGAGAAA
++
+AGEGGGGFGFFG7FGGGGEG@:;CGGEGEGG=FG=FFGGFGEG7=9G?-<
+@test_sample1-1-505/1
+TCTTTCACTCTTCACAATAAATCTTGCTGCTGCTCAAAAAAAAAAAAAAG
++
+GE9AG?FGDG@EF?GGGGC;66?GGBBGBFFGGD<4GGAGG:C4;FDGDG
+@test_sample1-1-507/1
+CATTCATTTATACAACACATTTTCAGTGTTGAATGATAAATTTTGGAATA
++
+EFGEGB5FFE>E2>GC?5;GFGGG6GB3B9GGEFEEFFGGFCF'7FGDF4
+@test_sample1-1-509/1
+GATGCCCTCATTCATGGTGTGGGGGTCAGTGAGCTTAATGTGTGTCAAGA
++
+GGGGGDGG?GGGGGEGGGGEFFFFG>>GGEACGGGGFFDGBFGFAEG;:F
+@test_sample1-1-511/1
+CCACAGTGTGGAAGGGGACCCGAGCGGTTTGCCCAGTTGTATTAACTTCT
++
+DFFF::GGGFFFGDG<@FDGCEEGG,:GDE-D%
+@test_sample1-1-529/1
+AGTCACACTCATTTGTTTCTGTATTGTCTGTTGACAGTTTTTATGCTACA
++
+FFBGG
+@test_sample1-1-533/1
+ATGAGGAAATGGGACATTGTTAATAAAACGAAAAAAATTCAATTAGTAGG
++
+FGGGGCGG7GGG@GEGGGGGFGGGGGGA8=GFGGGGGFD(GEC@F>8GGD
+@test_sample1-1-535/1
+CTTGAGAAAATTAATATGACTCATAGATCTGGTTCCCAAGAGAAATCAAT
++
+BGGF9GGFDGGEGFDGG?GF;GGAG<@EGFF6GDFGCGEFGGGGEGF9D5
+@test_sample1-1-537/1
+GTAGCATAAAAACTGTCAACAGACAATACAGAAACAAATGAGTGTGACTG
++
+GGG2GGF/:FDGEFFFE@$FGEG;CFGFGC?GFFGGGGGGF;EDE@AFGC
+@test_sample1-1-539/1
+AAACGTCTTTTTGAGGTTCGTACCTCACTTAAGTCACACTGTTCAAAATA
++
+DFFGGDFGGFGGC06GFGGGF?0A@FEGGE;FCEGGE0C86?B@@GD?FE
+@test_sample1-1-541/1
+ATAACCCTAGTAACCATCATACTATTCTCTGCTTCTATGAGTTCAGATTT
++
+FGF:EFG@G?GEABG7GFGFFGG7
+@test_sample1-1-543/1
+TAGGATTACATTCAGGGGACCCAATATACGACATGTTGACTGTAATTAAT
++
+FEF4DGG3FEGGEGGGFGGFDFFGGGGEF0GGGGGFGEGCFFGEG:FGFF
+@test_sample1-1-545/1
+AGATGCCCTCATTCATGGTGTGGGGGTCAGTGAGCTTAATGTGTGTCAAG
++
+FF:GFGGEGGFGF=9(FGEEF;:GGGEF7GFFEGEA;F?FGDEDF:.BGG
+@test_sample1-1-547/1
+GATGTTTTGATACATATGTATATTTATCGTGAAACAGTGACCACGATCAA
++
+GGCF2GFGG>GGFFEGGD=GG"""GGFGGGEGGFDGDGGCGGFG,EGE8?
+@test_sample1-1-549/1
+AGCTAGACACAGAAAGACAAATATTGCATGATCTCACTTAGCATCTAAAA
++
+G=FGFFGGGGGCFDGE>GCGCGGGGD2FGF?7G:G>FGFGAA@GGGGG@;
+@test_sample1-1-551/1
+AGCAGAGAATAGTATGATGGTTACTAGGGTTATCTGGCAGGGAGAGGATG
++
+FGGGGGEFCGDG7FDGGGD,GEGGGFGGB6BEDCEFFDFFAGGDCB>GGG
+@test_sample1-1-553/1
+CATGTGTTACTGTACAGGGTACGGGCCACTATAAATTCCTTCAGCAACTG
++
+BEGGAFGEG>GG1GFGG*EGEFAGG@EGGGFGDGGF9FGGEF@E=>EGGG
+@test_sample1-1-555/1
+AAAGGAATTGAAATCAATATGTCAAAGGGATATCTGCACTCCTATGTTAT
++
+EF?8:9@GFGG=>AGGDFFFFGGGA=EFAAFGFFGGGFGGGG-=FEFGGG
+@test_sample1-1-557/1
+ATGTTGTCTCCTGAGAAATGTCTATTCCTGTCCTCTGCCGATTTTTAAAT
++
+GGFFGDC:FG;FG)&GBFEGGDGGGGGDGFG;BBCA?G?GB;FF;9GG;G
+@test_sample1-1-559/1
+AAATAATTGGAGATAGGAGGAAAACAGAAAAGTGGAGGAGATAGAAGGTT
++
+AGG9FEEGGE@G5GFGEE#AFGGGGGBGFG=EGFGA@%FFGG9'?@FGGD
+@test_sample1-1-561/1
+GTTCCCTTTTCTATCATCCTCTCTGATAGTTGCTATCTTTCATCTTTTTG
++
+GGGGGAGFAEGGGGGGDFGGEGGGF>GFGFGGGGGFF@FFE5FFGG/F24
+@test_sample1-1-563/1
+TAAATGAAATGTGATATGGAAAATTATTCAGCCTTAAAAACAGTAGGCAA
++
+GFGFFEGGGGGCGBGFFGE5GGDGG0BA7FGGGGGGGFCFEFEE;A=EGC
+@test_sample1-1-565/1
+GCTATATTTTAAAATTTACATTTCTTTGATAAAAAGTGAGTTTGAATATT
++
+GGGGBGG?GGGBGFGGGGEAGFCG9CGGDDGGBGGGACGB;6D&'DFGGE
+@test_sample1-1-567/1
+TAGTAGGATTACATTCAGGGGACCCAATATACGACATGTTGACTGTAATT
++
+G:FG7G;A.GGG<7GFGFAFEGGGGG?FGFGFGGGGGFGGGFGDE6GFG6
+@test_sample1-1-569/1
+AACTGGGCAAACCGCTCGGGTCCCCTTCCACACTGTGGAAGCTTTGTTCT
++
+AFAEGDGDGGGG;GGGFGEFAGG2<
+@test_sample1-1-573/1
+GAAGAGGGTATGGTAGAAGCACCATCCATATCTATCTTCATCATCTTCAT
++
+GFFGE=GFGAGGGGGGFGEF@GG?G.GD>FF;DFGGGFFAGGGF;8GFGD9GGABGED8B4GGFEGAFGB4FG
+@test_sample1-1-581/1
+CAAATTAAAAATAGAACTGTCACATGATCCAAGAATCCTACTTCTGGGTA
++
+GCGGGGGFGCGGGGFFGGG&FBF7G7GGGGCGFEG>GBECEDA9EFAAGF
+@test_sample1-1-583/1
+GGTATCACTTTCCTGTCCTAGAGGGAACATATTTCCTGCATTTTTCTTTT
++
+CGGDGBGGDFCGGGGGEGGGB0G-EA'EBEGFGFFEEFGGGFF@B@FBFA
+@test_sample1-1-585/1
+TTAAATTATATTTAAAAACCATTTTCCCACATTTGTTTTTACTTTAATGG
++
+AGGFGGG>GG5GGGAFFGGGG@FFEA$GCGFFGEGGE427>CE60?G<7D
+@test_sample1-1-587/1
+CCTGTTAGAATGGCTATTATCAAAAAGATGAAAGATAGCAACTATCAGAG
++
+GGGGGGGGGFGDG6GGGGGGFBGA2FGGFG9AF?GGD@:GG@9GFF<>FD
+@test_sample1-1-589/1
+AGGGTGTGCAGTAGTGACACTGGGAAACTGATCATTTTGATTTCTGCTTT
++
+BGG3?GEFGGGGGGG;F>EGFE0EGGGG<=BAGGGGGGG@AGGFFGEB7.
+@test_sample1-1-591/1
+ATCATCTTCATATATAGTTGGGAGTAAAGATGCAGATGAAAGTTGAGAAG
++
+GG=G.FEFFCGG>GGGGD?G9@G@BDBA8EFE4>CAGF@GF=FGFE>@FF
+@test_sample1-1-593/1
+TTTATTTATTTTACTTTTGACATATTTGTGTTACTTGTATATTCTGGATA
++
+CGGGGGFGGGGFGFBEFDGGBFFGD%+FFGGFGGGGGB
+@test_sample1-1-597/1
+ACATACATTCATTCATTTATACAACACATTTTCAGTGTTGAATGATAAAT
++
+DFF@72BCGGGE;GFBGDGGGGGGGGFGGEGC>8FDG;FFE9GGGGGA@D
+@test_sample1-1-599/1
+ATCCATATCTATCTTCATCATCTTCATATATAGTTGGGAGTAAAGATGCA
++
+FGGF7.GG=F>F<;EGEG>@GFGGDG6FBEGGFGFCE@GGGG$=BAGBCG
+@test_sample1-1-601/1
+AAGTGGAGGAGATAGAAGGTTGAAGGGAAATGTAAGAAGAGAGAGATTGC
++
+GGFGEGGGD?GG<0BFB'@DDGGFGFGBGGGFB9?G9DDFCGGG4B'F>G
+@test_sample1-1-603/1
+CAATTAAAAAATAATTTTTAAAAATGAGAAACAAAAAAGCTGACATTTTC
++
+GDFGEGGGAGGFFGGG=E>FGGGE
+@test_sample1-1-605/1
+AACTGAGTTGAGGGAAACTAAGACGTCATCCACCATTGTTGAAGATGCCC
++
+GFGGFEGFEGCGGGCGGC857FGGGG'CGG>G0F7FGGGFE>-FGGEGDCFGGG?GD3FGEDCGFGF?EGE+
+@test_sample1-1-609/1
+TTCATAGACAGATGAATGGATAAATGAAATGTGATATGGAAAATTATTCA
++
+?<93CGGGF:41EGFGGEFGGGFGGAEG?FGGGGB@GGEFDGE8FGDG;>
+@test_sample1-1-611/1
+AACATACACATATATTAAAACATCACACTGTATTCCATAAATATATATAA
++
+A@>FGGGAGDFG>CEGGFAGGGCGGGGGGGFGB6ECGEGABF1FAFG"CA
+@test_sample1-1-613/1
+GAAACAAGGAAGAGAGCTTTAATGCAATGGTGATATTCGTATAACTATTC
++
+F=FE@G@DGGBE6DEGGBGFFEFGGD?BGGFGA8F:9FGGFG8>FGFF?;
+@test_sample1-1-615/1
+CAGAATTGCCTACTGTTTTTAAGGCTGAATAATTTTCCATATCACATTTC
++
+GGGGGGFGFFGGGGEFFGGGE:'FDGGGGGGGG=FBAGFG.EGGGGCGFGFFEF>FGFGCE'F@
+@test_sample1-1-619/1
+TAATTTGTACATGATGCAAGGGTTCCCTTTTCTATCATCCTCTCTGATAG
++
+GDGGEFFG@GG@E?GG>@GGFGC8E9GGF8C>GFEB?9EGGFA
+@test_sample1-1-621/1
+TCTCCCTGCCAGATAACCCTAGTAACCATCATACTATTCTCTGCTTCTAT
++
+FEDDAGGGGFGEGGGGFFFGFF=EBFEGGGFFBFGEDGGGFGGC%AC:EG
+@test_sample1-1-623/1
+CACAGGCTCATCCATGATATGTTATTATTTATGAAAACACAATAGCAATG
++
+GGG:G>FGFGGFCGDFG@FGFAGGGGFDFGAF5D>GGEFGGGB:GGDE9C
+@test_sample1-1-625/1
+GTAATATATGTGTTAATTAACTTGATTTATTCATTCAACAACATACACAT
++
+A7=GGEFGGG>GGEDGGGGFGBGFEFGFEFFGG1CFE
+@test_sample1-1-631/1
+TGTGTTAATTAACTTGATTTATTCATTCAACAACATACACATATATTAAA
++
+GEFCGGGGBGGDGE:9G0GDDGCBDCG8GGG@E<99GEFCGGGFGFFB>E
+@test_sample1-1-633/1
+TTGAAATTCTTAACAATTTTTTTAACCAAAGTCCTCACAAATTCAGTTTA
++
+EGDGGFGGGGGDGFDGDGGFGBGECFGEGF@GFGFGF
+@test_sample1-1-641/1
+TGTGAACATGCTGCAATAACATAGGAGTGCAGATATCCCTTTGACATATT
++
+BGGGDF.GC8GG@:GFC1GGAFGGGF&GFFDFGG=GF9*.GG@GFGGGG=
+@test_sample1-1-643/1
+ACATTTCATTTATCCATTCATCTGTCTATGAACAGTTAGGTTGATTCTAT
++
+GEGGF;AFFDGEGGGFGGGFGGFFFFBFE@GG>CE;FGGGD<-
+@test_sample1-1-647/1
+AAATATCAGAAGAGTTACTTGGTGATGGAAAGTAAGGAAAAAAAGAAAAA
++
+GGGDCGGFGGGFEGGGFB;FBF?FFGGDGDGGG8/.CCFFFF%';.GGCF
+@test_sample1-1-649/1
+CAGAGAGGATGATAGAAAAGGGAACCCTTGCATCATGTACAAATTAAAAA
++
+@GFGGFGGG8FFGEFD?FCGBGGGC5DGGGGGGGCFGGEED.6FG=E;FD
+@test_sample1-1-651/1
+AGGAAACAGACACTCATACTCTGCAAAGCAATGACTAAGATAATTATGTC
++
+GGGCGGG@GGGEFGG5BFGGGEGGFFEFGEE'3EGGF
+@test_sample1-1-653/1
+TACCCTCTTCCTCTGCCAAACTTCCTTGATCTAGGATAAGGTCAGTAAAC
++
+GFG?DGGGGFD"GG@GEG>3GFGGFGGFFD@GGDGF?GGFG58F6F%@GG
+@test_sample1-1-655/1
+GAGCCATGCTTAACTGTTGAGCAAAATAAAAGGGCTCAAAACTGTTTCTC
++
+FGG?89FGFGEGE9ABFEGG&ACECC-C5G?EFGGDD>F>EGD%FGGDFBBFFFDGGCCGGDG2=B@GGGEGBFDD
+@test_sample1-1-659/1
+AACTGTCAACAGACAATACAGAAACAAATGAGTGTGACTGGGTTCCAGTG
++
+GCEGGFCGGGCGDGFGGFG@?FGC?DGGDDGFF:+FGEDGE4>BDD7AAD
+@test_sample1-1-661/1
+GGATGAGGAAATGGGACATTGTTAATAAAACGAAAAAAATTCAATTAGTA
++
+GGEGFFGFFGGECGFGGG;GDBFGFFGGGGGGFEF6GGF>BE@=%FDBFE
+@test_sample1-1-663/1
+GCAATTCTGTCATTTGAGACAACGTGGATGAACCTAGAGGACATTAAGCT
++
+GGGGGGAGGEG=GGF-($GGGDGFAGG6GGGGGGDGGGC9BG5D6D><-(
+@test_sample1-1-665/1
+CAAAAAAAAAAAAAAGTTAATACAACTCTGAGCACTCAGTACATAAAGTT
++
+GDFE;EGFGGGG/BGGGGGBGGFGGGGFGGF&5G@CFGG;;'&
+@test_sample1-1-667/1
+TTGCCTTGACATTGCTAAACTTTGGGAGCGTTTTTCTGACCTACCTGTTC
++
+GGFEGECC@BGGFD*FEG;GGGEGBBFGCGDGGGG7:E0FF?)GGFBC38
+@test_sample1-1-669/1
+AATGATCAGTTTCCCAGTGTCACTACTGCACACCCTGGAACAGAACAGGT
++
+EGG5GGGEGGFABGA4/GGGGGAAFFGFDFEFFGEGBFFG*GFFGGFG8FGGGGG@7F8;DGG
+@test_sample1-1-675/1
+ATAGATATGGATGGTGCTTCTACCATACCCTCTTCCTCTGCCAAACTTCC
++
+GG7EG?C=FGGGGDDFEBGGFGGGFFGGGGFFGGEGGGGGC?FG@EGAGF
+@test_sample1-1-677/1
+TACTAATTAAAACCAAAATGAGATGTCCCCTCAAACCTGTTAGAATGGCT
++
+GE@DDCG#.GGGF1AGFGF>=FDFGFEGGC:EFGGEGGFGF8?GEAFFEF
+@test_sample1-1-679/1
+ACTCTTCTGATATTTTTTCTCTTGAGAAAATTAATATGACTCATAGATCT
++
+GCGCFGCGGGGGGDGGDFFFG?GGGEGFGGGGEDCG1=A1=BGF@)FFGF
+@test_sample1-1-681/1
+TTCAGATTTTTTAGATGCTAAGTGAGATCATGCAATATTTGTCTTTCTGT
++
+GC7GGFCFGEGGFGG2GDFFGEGFFGECEG=>DCGGF'G74GGEGCFGGE
+@test_sample1-1-683/1
+AGTTGCTGAAGGAATTTATAGTGGCCCGTACCCTGTACAGTAACACATGA
++
+FG@FGEFDGFGG9GGG;GGG*GGDGGFFEGGGG7GGEFEA9GFEFEFBEE
+@test_sample1-1-685/1
+TTAAATATAATTTAAACCAAATTTAAAATAAGCATATAAAGACTATGGAC
++
+AFGGGGDDBGE5D6:G@GGEG@FCCGEECFGGGF@<GFGG
+@test_sample1-1-687/1
+ATAAATGAATGAATGTATGTCTTCGTGTAACCATTCTTTCCTTAAAACAT
++
+8GFFG=5GF@GAFAFEGBDFGFG;GGGEAEFGGGGGGFFC?G@BGE=EFF
+@test_sample1-1-689/1
+TGGTTTTTAAATATAATTTAAACCAAATTTAAAATAAGCATATAAAGACT
++
+=GFGFGGDGGGGDDGFEGGGGGFGGE&GFGEFD1-FEGGA6GGG7):BBA
+@test_sample1-1-691/1
+GTATGTCTTCGTGTAACCATTCTTTCCTTAAAACATACCGAGTTCTGACT
++
+GFFGGGCGFFBFBG;FGG%?FGGGFEFAF?FGEBEGFDFD
+@test_sample1-1-693/1
+AGTTACAATTTAAAATTTTTAAAAAACGTCTTTTTGAGGTTCGTACCTCA
++
+GCGFGGG'BFGGFGDGFFF/FGDFEFFGFBDCGEFGGGGGF?9FFGFGGB
+@test_sample1-1-695/1
+GCCCAGTTGTATTAACTTCTAATTCAACACTTTAAGATTCTTAGCATTAT
++
+GEGEFGGGFGGFGGGGFADFGG7FFEFEGGF58GGFGG&B'G56ED%BG'
+@test_sample1-1-697/1
+CAAATGTGGGAAAATGGTTTTTAAATATAATTTAAACCAAATTTAAAATA
++
+GEGGG@BEFGGFGGGGFG=G4G@GD;CFFGFGGEGGFBFGGG9&EFCGFG
+@test_sample1-1-699/1
+CAGTTTACATTAGCCCTGCAATCATGTAGACATCCTGATTCCAGACAATG
++
+>EDFGBGGFGD7>GFGEGFGGFAG@FGCBFGF?FGGEGFGFEGFGFGGFC
+@test_sample1-1-701/1
+TAACTCTTCTGATATTTTTTCTCTTGAGAAAATTAATATGACTCATAGAT
++
+GGGDGGG;BGGD@FFGD@E@EGGCGFEGGFGFGGFDG@GGEEFFGG6FF@DFGFFG*5GF
+@test_sample1-1-711/1
+AATTATTTTTTAATTGACAAAAATTATATATATTTATGGAATACAGTGTG
++
+G0GFGGEGFGFG@BGGE?GGGGGGF?D?;FGGEFG>3BFGCCGGGG<>CG
+@test_sample1-1-713/1
+CTTTTTTTTTTACTTCATTGGATAGGGATGACTGAAAACAACACTTTTAT
++
+>GGEGFGGFD?FGGGGGG6FGGFG4DCFGGG9DEFGDEF>>E::DGGGE7
+@test_sample1-1-715/1
+CCCTTTTCTATCATCCTCTCTGATAGTTGCTATCTTTCATCTTTTTGATA
++
+CCCG??G@FCCEDGDE@GGGFFGGGGG:=GFD0EGGB=GGFG;:/B<9BE
+@test_sample1-1-717/1
+TCAGGGAAGTACTAATTAAAACCAAAATGAGATGTCCCCTCAAACCTGTT
++
+EGD?DGD@GFGGGGGFGGGF3FCGGF?EEGGFGGC4D8G;2GGGCFDD9%
+@test_sample1-1-719/1
+TAAATAAATAAATAAATAAATAAATAAATTAAATAAATTATTTAAAAATC
++
+FEFFGGGAGGGBG4GGGD4BGGGGEGGGCGG9EGFGGFBFFEGF3B>:D
+@test_sample1-1-723/1
+GTGTCTGGAGGCAGGGTTTACAGGACTTCAAGAACCTTACCTTCTCAACT
++
+GFGG5FFGFGGGGGGGBGCEF?G/9FFGFFFG@FG:.7DA8GE%G@D:7<
+@test_sample1-1-725/1
+AAGGTCAGTAAACTTCTTCCGTAAAAGGCCAAAAGTAAATATTATAGGCT
++
+GGG8?GGGF)FE@:FCEE
+@test_sample1-1-727/1
+TATGTTATTATTTATGAAAACACAATAGCAATGCTATATTTTAAAATTTA
++
+GF@GGDFFGGGGGCFGGFBG:GGGFFGFGGBBGD;9D@GGGGGG?GDGGF
+@test_sample1-1-729/1
+TGTATCTGTGGCCCTTTGTATGTTGTCTCCTGAGAAATGTCTATTCCTGT
++
+FFEGGGBG@FGGDGEFEBGG=GGGGGFEFGFGFFGG;GGBGGGGGC8GGF
+@test_sample1-1-731/1
+ATTAGTACCACTAGATGTTAGTGACCCATTGATGCTTCTTAGATCCTTGT
++
+F:BCFGGFGGGGEGDCFGGGGG8GEFFFGGGGF?GGG>FGCCG?EDE2GE
+@test_sample1-1-733/1
+TTCCCACATTTGTTTTTACTTTAATGGATTAATTCTTCTGTATAATTTTT
++
+GGGF9GFGG=EGGGGGGGGGGG?<2FGFGGEF,262FFGGD=B4C%DG76
+@test_sample1-1-735/1
+AAGTGTGTGTCCTGTGCAGTTGAACAAGATCCCACACTTAAAAGGATCCT
++
+GGB>E:D:FGGF'ADGFD@G?GDGGGGGFEGGEAGF4GGC6DCGGGG%FF
+@test_sample1-1-737/1
+CAATAGCAATGCTATATTTTAAAATTTACATTTCTTTGATAAAAAGTGAG
++
+FFFDGGGGB>>GGGFGGGGGAGGF6GFGEEFGFFGDAG<4FGFG:CFD?D
+@test_sample1-1-739/1
+ATTAGAAGTTAATACAACTGGGCAAACCGCTCGGGTCCCCTTCCACACTG
++
+GGGGFCGG9C=GFGGGGGGGGFGEGG@GGGGAGFGFEFFCGGEC>0>FBF
+@test_sample1-1-741/1
+CATTATTGCAGACAACATCAGCTTCACAAGTGTGTGTCCTGTGCAGTTGA
++
+GGGGGGEGF@GCGG=FGGBFBGGGFGFGF?GG=GF195:F@?FFG:DFGGGGFEGGGG?GCDFGE>3EF;:GEE
+@test_sample1-1-745/1
+AATGGGTCACTAACATCTAGTGGTACTAATTAACTCTGTTAATCATTGGG
++
+GGFB;BBFA8
+@test_sample1-1-747/1
+TGCAGTTGAACAAGATCCCACACTTAAAAGGATCCTACACTTTTTTTAAT
++
+GFBG9FFFFGGG9FAGFFFFEFFGGEGGG:GGGE-F7EGG4,89-18GFFA%DF>=
+@test_sample1-1-767/1
+CAACTCTTATTGTAGCATAAAAACTGTCAACAGACAATACAGAAACAAAT
++
+G?DGFEFGGFGGCD?FFGG>GGGG;GGFFDGFGDFG.CGBCFFGCGD=6F
+@test_sample1-1-769/1
+GCACTCCTATGTTATTGCAGCATGTTCACAATGGCCAAGATATAGAATCA
++
+GGFDGGF.*:GGCG9GGGFGFDFFFE/=(DCFGEFF/E91FA
+@test_sample1-1-771/1
+AAGCTAATTAACGTAATTCATGATCTGACATAATTATCTTAGTCATTGCT
++
+GBFGGFFFGF6>GD9GG?GGGGDGFEGBEGGGBFGEEEGGFAFGFFG0F@
+@test_sample1-1-773/1
+ATTTTTTCTCTTGAGAAAATTAATATGACTCATAGATCTGGTTCCCAAGA
++
+GGFFG64GGDGGG;4GG=4G=?CGFGFFGDFGF=BFG?F5FFBA7GB=+D
+@test_sample1-1-775/1
+AACTTTCATCTGCATCTTTACTCCCAACTATATATGAAGATGATGAAGAT
++
+F$FGGGGGFCGEFGG'FGGGGEGEGGFGEFCGCGFFGFGGF>GGEC?;2G
+@test_sample1-1-777/1
+TGGGAAGAAAATGTATATATACTTTTGTCTTGGAGCTGATTCTACTAGAA
++
+GEGGGFG@DFGBGGFGGFGGGGGGGFEGFCGGDCEEGFGGEGGFD9GAFE
+@test_sample1-1-779/1
+TCTTGGAGCTGATTCTACTAGAAAGCAGAAATCAAAATGATCAGTTTCCC
++
+GGGG@=CEGBF27F>GGGFGGGGFGEDFG9GFGGEABF58>GFGGD;3;D
+@test_sample1-1-781/1
+TCTAATTCAACACTTTAAGATTCTTAGCATTATTGCAGACAACATCAGCT
++
+E?GFFGGGGGGGGGGGGGGGGGGFGGEG;E>GBBDGGGF?BGGGGGDAGF
+@test_sample1-1-783/1
+TGATACATATGTATATTTATCGTGAAACAGTGACCACGATCAAGCTAATT
++
+F2BDG>GEFGFD:EEGFFEFGGGAGBAGGFGDEGFGCBGEGFG?:1DG?8
+@test_sample1-1-785/1
+TGACATAATTATCTTAGTCATTGCTTTGCAGAGTATGAGTGTCTGTTTCC
++
+GGGGG4GGF@2GFGGGGGGFB?G>FEGGF=GGF@FDGG
+@test_sample1-1-795/1
+GAAGTTAATACAACTGGGCAAACCGCTCGGGTCCCCTTCCACACTGTGGA
++
+GGEGGGGDE3E6G1FGFGGGGGEGCFCFCAGEGG6GGGFGF8G(F7:@>E
+@test_sample1-1-797/1
+TTGTGAAGCTGATGTTGTCTGCAATAATGCTAAGAATCTTAAAGTGTTGA
++
+&GGGFGEGGGGFGEDGGFAGG>GDFCFFGDGGFGEG6@GGD9G?EFGEC=
+@test_sample1-1-799/1
+AATAAATCAAGTTAATTAACACATATATTACCTCATATTAACATTTACAA
++
+GGFGBGGGGF=GGGFEGGCFFFDFGGFGGG7E9FFEEGFFGG>;EC=FBDGGG
+@test_sample1-1-803/1
+TCCAGTTGCTGAAGGAATTTATAGTGGCCCGTACCCTGTACAGTAACACA
++
+EGGGFFGGFEG=>GFCGGGGGG5GFFGGGGDFCEGEBGF8GFGFG1AGGGFCF>GG>GG=GGFDAF?4:GFGGGGGFGGGFFF4%>FFAFGF
+@test_sample1-1-809/1
+TATTCGTATAACTATTCTTGATTTCTTCCCATTCAACACTTTTTTTTTTA
++
+D?GGGDDAGG@DDFG
+@test_sample1-1-811/1
+CAACAATGGTGGATGACGTCTTAGTTTCCCTCAACTCAGTTAATCTAAGT
++
+'-GFF@GFEGEFGFCGGFD?:CGGGG@BGE<55GGGEFFE?G@95GFGFF
+@test_sample1-1-813/1
+CATGTCCCTTATATTTAGTCAGAACTCGGTATGTTTTAAGGAAAGAATGG
++
+GGGGEBFGCGGGGBGFGFGGGGFGFFFFDGGFGGGCGGFFE@G-DGGGAC
+@test_sample1-1-815/1
+TCAGTTTACATTAGCCCTGCAATCATGTAGACATCCTGATTCCAGACAAT
++
+@9?GGFFA;FFGGEE?FFFG?GGGGG7G>GGGGFGGG9@@;F49GF:E<0
+@test_sample1-1-817/1
+TGTCTGGAGGCAGGGTTTACAGGACTTCAAGAACCTTACCTTCTCAACTT
++
+GFGEAFFBFGGGDFCGEAGE@GEF@BGG4GFFGGG=E:FGGFFDFG:?F7
+@test_sample1-1-819/1
+CAGGAATAGACATTTCTCAGGAGACAACATACAAAGGGCCACAGATACAT
++
+GCGE?GGGGGGGGFFGFGG:FGGFFF7FEFGGGFGGGF@GAGGF?GGFEGGGFEGFFFFFGEGFGFFG
+@test_sample1-1-825/1
+ATAGAATCAACCTAACTGTTCATAGACAGATGAATGGATAAATGAAATGT
++
+GGGFGGGFFFGGGGA6BGFEGEGGFGAF1AGAGD0G>DF@FG2@BAFED=
+@test_sample1-1-827/1
+CAGAGGAAGAGGGTATGGTAGAAGCACCATCCATATCTATCTTCATCATC
++
+GAFGGGGGGGF1DDG@GFF9FFGGF@GFGFF>AEGGGDCFGGCF,C=BCF
+@test_sample1-1-829/1
+ACCACGATCAAGCTAATTAACGTAATTCATGATCTGACATAATTATCTTA
++
+GBFGGGGADGGGFGFGGG;F%GGGGGECG7G@CGG7GF?DEGFGEGEFGGGGFGFFGGFGGBGG7>GGG@FFG::GGBGGF?G4
+@test_sample1-1-833/1
+ATAAATATACATATGTATCAAAACATCACATTACACACCATAAAGATATA
++
+G7FGGGFE>6E?GGGD??$,
+@test_sample1-1-835/1
+GTAAAAAAAAAAGTGTTGAATGGGAAGAAATCAAGAATAGTTATACGAAT
++
+GGCG:@GF:GG13GEFBGED=3DGF&FAG
+@test_sample1-1-839/1
+AATAATTTTCCATATCACATTTCATTTATCCATTCATCTGTCTATGAACA
++
+?GGF6FGFGGF@FGGGGAEGGFFGFG8#GEG?FGGDC:FGFFEFGDFADF
+@test_sample1-1-841/1
+CTTCTATGAGTTCAGATTTTTTAGATGCTAAGTGAGATCATGCAATATTT
++
+G9?G@FGGEDGDEG>GFGFGGFFFFB=G=FF@?
+@test_sample1-1-845/1
+AGATATAGAATCAACCTAACTGTTCATAGACAGATGAATGGATAAATGAA
++
+F7GGGGFGCFGGEGCE8EDGFFDFF<'@FE3GE2DD5FG>F9C<0
+@test_sample1-1-847/1
+AACAGTAGGCAATTCTGTCATTTGAGACAACGTGGATGAACCTAGAGGAC
++
+0G=FGGBFGGGCFGFFFGEGE=GFDC:GFGFEGCG?EGGEGEAECCFDFE
+@test_sample1-1-849/1
+CACAAATATGTCAAAAGTAAAATAAATAAATAAATAAATAAATAAATAAA
++
+GGDEGG8FGFD:GFGGGFGGFGG=EG>F@FFGDGGE?FGGGCD7GGGE6D
+@test_sample1-1-851/1
+GACCCGAGCGGTTTGCCCAGTTGTATTAACTTCTAATTCAACACTTTAAG
++
+GG6GGFGGEGGGGGGFFGEGGFBDGFGAGFGG/GEFG;@EFCGCFF
+@test_sample1-1-853/1
+TCTTATTGTAGCATAAAAACTGTCAACAGACAATACAGAAACAAATGAGT
++
+DG=GFEGFFGFFGGAGEBGGEGGFGFC(GGGE?EGGFF>EG@*<5DGDCB
+@test_sample1-1-855/1
+ATTCTTGGATCATGTGACAGTTCTATTTTTAATTTGTACATGATGCAAGG
++
+GGGGECGGGGGG?BGGFCGGGEEADEEGFGGGG@B<>EGGDEDF4F6GFB
+@test_sample1-1-857/1
+GATTACATTCAGGGGACCCAATATACGACATGTTGACTGTAATTAATAAT
++
+GBGEFGFGFGGDB9F.GGGGFGG4FGFFCFFGDFG(3E;AD0FF@GGE:
+@test_sample1-1-861/1
+TATTAACTTTTTTTTTTTTTTGAGCAGCAGCAAGATTTATTGTGAAGAGT
++
+FFEEFG6FG@GGFGCGGGGGGG@GGEFGGGGGGGGBFFCEEA7'FF@GF$
+@test_sample1-1-863/1
+GAAGAATATTCAAACTCACTTTTTATCAAAGAAATGTAAATTTTAAAATA
++
+GCG)GGGGFCFCGGGG;GGGCGGGFCGGGGGDGFGDGGG=6+AGCEAFCF
+@test_sample1-1-865/1
+AGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGTCAAGGCAATCTC
++
+GGGGGBGDGGEGGFFFFFGFFDGGGFGGGGGEGCFGEFGGFGG*EADE34
+@test_sample1-1-867/1
+ATTGATTTCTCTTGGGAACCAGATCTATGAGTCATATTAATTTTCTCAAG
++
+FCFEGECGGG>6GGGGFGEGGD6GEF@E@;;FGGFC>FE@FEG@7D=/C
+@test_sample1-1-875/1
+GTAAACCCTGCCTCCAGACACATTGTCTGGAATCAGGATGTCTACATGAT
++
+FEGF?5GEGGA=BGG6<8FGCGGFGEFGG.CFGFGGGGGG<=EGFGGCDG
+@test_sample1-1-877/1
+AATAAGAGTTGAGTAGTTATGACAGACACTCTAGGGCCTGTAGAGCCTAT
++
+G1EGF=GGCDGFEGF)GFGGGFFFGDBGFGGGEE>GG2GGGGGGDFGBGF
+@test_sample1-1-879/1
+AAAAGGCCAAAAGTAAATATTATAGGCTCTACAGGCCCTAGAGTGTCTGT
++
+FGDEGGGGD@GEFEBGDCDCFG'F$5CD7GGG4EADGG9FEEGCGBGGGF
+@test_sample1-1-881/1
+GAAATATGTTCCCTCTAGGACAGGAAAGTGATACCATGAGCTTACTTAGA
++
+GF8B@GGCBADGGGAGG@G.FE7GGGGGGGBC)DFCEDFBFGGGE1F
+@test_sample1-1-883/1
+AGTTTGAATATTCTTCATATGTTTATTTTTTATGTGGATTTCTTGTTTTG
++
+E@F4GAGF:9GG=EGGGG@B5BA*FGEEGGE4FGDG6GGGGGGCDGFGFGD49GGBGGGE&DGGGD
+@test_sample1-1-887/1
+TTGAGGGGACATCTCATTTTGGTTTTAATTAGTACTTCCCTGACAATTAG
++
+DGGFGGEFGG?FGFGGGGGAG>FGGGD.4GGFBGGDEGGG>FGFDEG>GGEA982;:GGFGF7
+@test_sample1-1-893/1
+AACTGGAAAGGAAACTTTATGTACTGAGTGCTCAGAGTTGTATTAACTTT
++
+FGAEG1EGEA5FGGCEFC*GGGCGGGGGGGG+FGG?GGF4GFG5FDG2DG
+@test_sample1-1-895/1
+TTAGGTTGATTCTATATCTTGGCCATTGTGAACATGCTGCAATAACATAG
++
+1GGG?GFDFF=G>GFGAG>EEGGG=GGGAGGEDCDEGFGGFG@GGGGGFC
+@test_sample1-1-897/1
+ATAACTACTCAACTCTTATTGTAGCATAAAAACTGTCAACAGACAATACA
++
+GGGGECG&FFBFGGGEGGFGFGGFACGGEGGD4:AFFGCFGGGBFCG7DF
+@test_sample1-1-899/1
+TTAGTCAGAACTCGGTATGTTTTAAGGAAAGAATGGTTACACGAAGACAT
++
+GFGFEG>EGG8GFCGGGGACG,ADG*8G;GFGGDGFFGGGGBGFF>GECF
+@test_sample1-1-901/1
+CTAATACACTATATTGTAAATGTTAATATGAGGTAATATATGTGTTAATT
++
+GFGF?DFF*GGGFFGEDBGGGGF1AA<>FF?BGEF-B0FGEEE3=BFD<@
+@test_sample1-1-903/1
+TCAGCAACTGGAAAGGAAACTTTATGTACTGAGTGCTCAGAGTTGTATTA
++
+GD@GGGGAG6GGFFE;GGBFF$DGEDAEDFGCEGEF:DE4*0F
+@test_sample1-1-913/1
+TAGTACCACTAGATGTTAGTGACCCATTGATGCTTCTTAGATCCTTGTAA
++
+GGGGEGFEEGGGFGGFEFGFG@GEFGDFE<57BA@GFFFFF>G>CD7DCD
+@test_sample1-1-915/1
+TAAACTGAATTTGTGAGGACTTTGGTTAAAAAAATTGTTAAGAATTTCAA
++
+G?CGGGGG@GGGGGGGG@F>G=(AGFGGFB/@GFGFAFGGGGG7EGG@CE
+@test_sample1-1-917/1
+TCATGCACTCATTTCTCTCATTCATGTGTTACTGTACAGGGTACGGGCCA
++
+GGGGGGGD6GFFGDGADGGBBEGGGG@
+@test_sample1-1-923/1
+CTAAGATAATTATGTCAGATCATGAATTACGTTAATTAGCTTGATCGTGG
++
+GGG?E;EGEFFGFGDGGGGGF5GE@FG)FFGFGGGF>FBGD=BDGGEBDB
+@test_sample1-1-925/1
+CATTTTTTGATGTATCTGTGGCCCTTTGTATGTTGTCTCCTGAGAAATGT
++
+GEG?GF.GCG7GG=7DCGG:?1F<
+@test_sample1-1-927/1
+AATTACAGTCAACATGTCGTATATTGGGTCCCCTGAATGTAATCCTACTA
++
+19GGGA(CGF;FGCGG@DG?GFFEGBGGFGGGC:@A@FFGG:GFGAGDB?
+@test_sample1-1-929/1
+GGGGGTCAGTGAGCTTAATGTGTGTCAAGATATTCTTTTAGAAACAAGGA
++
+BF<@GGFFGGGD?7GAGFAF&GGFGGGGE?FGGGGG;GGFGGCB;E=GGFGEFAEDFG1D
+@test_sample1-1-939/1
+CAACTCTTATTGTAGCATAAAAACTGTCAACAGACAATACAGAAACAAAT
++
+>0EGCDG*GGCFGFGFGFF7GGGGGGGG40GF9:GEA@=C@GGFD
+@test_sample1-1-943/1
+CAAGGATCTAAGAAGCATCAATGGGTCACTAACATCTAGTGGTACTAATT
++
+GGF>ED&FFFGG:F>GGDGDC@>FFDDE?F+
+@test_sample1-1-945/1
+ACGCTCCCAAAGTTTAGCAATGTCAAGGCAATCTCTCTCTTCTTACATTT
++
+GEFDGGGB@A>GGG7GDG;FFGGCGDG?GGG6?2@GGFFFEFF8>F-FGG
+@test_sample1-1-947/1
+CTGCTGCTCAAAAAAAAAAAAAAGTTAATACAACTCTGAGCACTCAGTAC
++
+GBDC
+@test_sample1-1-949/1
+CAACTCAGTTAATCTAAGTAAGCTCATGGTATCACTTTCCTGTCCTAGAG
++
+GDGF?GFCGFGGGF2EF?FGEFGGFG<9FGGEFGGEGGGGD@F3GDBCBG
+@test_sample1-1-953/1
+TACTGTTTTTAAGGCTGAATAATTTTCCATATCACATTTCATTTATCCAT
++
+FBFGGEGGGFFGFGGEG?GEG6GEDAGFFEG8G=/BFG>8GGAGGGG=GFEG>GFGGFGGGG:FDC.9
+@test_sample1-1-959/1
+CCATCACCAAGTAACTCTTCTGATATTTTTTCTCTTGAGAAAATTAATAT
++
+GGFGEFGFEFGGEEGF5EGGGEDB2GDGGEGGGGGBFD?GF8FDFG@FFG
+@test_sample1-1-961/1
+CAGCATGTTCACAATGGCCAAGATATAGAATCAACCTAACTGTTCATAGA
++
+GGGFG<4CEGF1DCCEGBCGEDGGFGGFC@G@FFBGE9DGGF?9E(=0E3
+@test_sample1-1-963/1
+TATATATGAAGATGATGAAGATAGATATGGATGGTGCTTCTACCATACCC
++
+EAB+FFGFEFGGGFCFGGFFEEGGFGGGE>7GB)
+@test_sample1-1-965/1
+CCTGATTCCAGACAATGTGTCTGGAGGCAGGGTTTACAGGACTTCAAGAA
++
+FFGGC?$GGGGGGGG:?.D@GEFGBGFGGFDGFGGEGGGGF@;EEA:GGG
+@test_sample1-1-967/1
+TGGCTATTATCAAAAAGATGAAAGATAGCAACTATCAGAGAGGATGATAG
++
+D0;G?EFED7GGFCGFGGGF,GEFFGFGBGGGFD@DAGEGEEGD4FFFC&
+@test_sample1-1-969/1
+CCATAAAGATATATAACTTGTTATCAAAAAGAAATATAGCAGTTACAATT
++
+GGAG-FFFGG=EEG4@>GGGGGAG@GGGE"2GGAGFFGF;<
+@test_sample1-1-971/1
+ATTTTTAAATAATTTATTTAATTTATTTATTTATTTATTTATTTATTTAT
++
+GDG>FFGGGD8@G%CG04G8FCGGGGGGGEGGDFFFFGGGDF@
+@test_sample1-1-973/1
+AGCAGCAAGATTTATTGTGAAGAGTGAAAGAACAAAGCTTCCACAGTGTG
++
+GGFB=@DGGEFGGGGGG?=GFGGGFF=DFFGGCGGGG%%BFB?FEDEEGG
+@test_sample1-1-975/1
+AAAACAACACTTTTATCATCTGTTAACTATTCCAAAATTTATCATTCAAC
++
+FGECFGGGEG?GBGGFFFGGGGG:GEBBGGC@GGGGGGD(GC>EG?EA
+@test_sample1-1-983/1
+CAACAGTTAAGCATGGCTCCATGTCCCTTATATTTAGTCAGAACTCGGTA
++
+GG7GBGGEFGGFFGDFGFG8GBGGGGGFAFFCB>=GAGDFGGGCGFGGG:
+@test_sample1-1-985/1
+GGAAGCTTTGTTCTTTCACTCTTCACAATAAATCTTGCTGCTGCTCAAAA
++
+GBGFGGGE6DA;EGGGFFGFGGFFGGG9EGGFGFG2F>EGGGFEDGFF<9
+@test_sample1-1-987/1
+AGTTTCCCAGTGTCACTACTGCACACCCTGGAACAGAACAGGTAGGTCAG
++
+GE9EGGGGCGGEGGFGFGG*FG/GGEBG=GGGADGGAGFG@DD&GG7>GEFD1EFGFGGDBDGGGGGGGGGE+B>56GF=G;EF
+@test_sample1-1-991/1
+GAGGGTATGGTAGAAGCACCATCCATATCTATCTTCATCATCTTCATATA
++
+GE@GGA:GGFF>GGGGGGFF?AGGBGFBGFGFFGGGGGE94CF7C
+@test_sample1-1-993/1
+AGTTACTTGGTGATGGAAAGTAAGGAAAAAAAGAAAAATGCAGGAAATAT
++
+FFFGFCFGFGG?GGGBA@GGF@EGG1DGGGCFG'FGFGEBAF@AFGBEDE
+@test_sample1-1-995/1
+CCTCAACTCAGTTAATCTAAGTAAGCTCATGGTATCACTTTCCTGTCCTA
++
+GFGGG/8FGGGG6ACGGGGDG@DFEGGCFFEFG@EGFBGG;FCEGGE@GG
+@test_sample1-1-997/1
+AAATTCAGTTTACATTAGCCCTGCAATCATGTAGACATCCTGATTCCAGA
++
+9G/GEGGCGGGGGGGEGGGFGCGGGGGFGGGCEEFFGGF;EGG>GEF=2F
+@test_sample1-1-999/1
+AGTGTGTGTCCTGTGCAGTTGAACAAGATCCCACACTTAAAAGGATCCTA
++
+GGEG@GFGGFGGGGGGGGDGGGG=GGCF8GGE;GDFEEAFFF%'%$%=/E
+@test_sample1-1-1001/1
+TTGTATAAATGAATGAATGTATGTCTTCGTGTAACCATTCTTTCCTTAAA
++
+GGFG1G?GDG4G?GGGGDFFB3CGGGG;GGCG9CGFFFFFGG281&F@GF
+@test_sample1-1-1003/1
+TCTTCATCATCTTCATATATAGTTGGGAGTAAAGATGCAGATGAAAGTTG
++
+GFGC?GGGGG-GFFGGFGGFCDFEGFG:$GGFGECFDEEGEGFEE7GF<8
+@test_sample1-1-1005/1
+TAACCCTAGTAACCATCATACTATTCTCTGCTTCTATGAGTTCAGATTTT
++
+FEGG=EGEGGFFGF.G;GEDFGGFG@?GG>9GEG5GGGFFA=CGGDGFFG
+@test_sample1-1-1007/1
+CCTGAATGTAATCCTACTAATTGAATTTTTTTCGTTTTATTAACAATGTC
++
+CGDGFGGGGGG7GGEG?GGGGGGEGG:@FGGF5)G;4DFGCFAECF:C$D
+@test_sample1-1-1009/1
+TTTTAAAAATTTTAAATTGTAACTGCTATATTTCTTTTTGATAACAAGTT
++
+AEG>EGDGFDGGGFGGFGG?GGGGGGF@FFGFD6FGEFGEGFAGGFGG@?
+@test_sample1-1-1011/1
+GAATTTGTGAGGACTTTGGTTAAAAAAATTGTTAAGAATTTCAAGGCAGA
++
+G>AG4EGGGFFGFGFGGFEGFFFGGGGGFEAFGGGGFGGGFEFGFBGFCF
+@test_sample1-1-1013/1
+GTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGTCAAGGCAATCTCTC
++
+GAFGFGGFGFFEF40GGFFGF:4AG>@99
+@test_sample1-1-1015/1
+TGAAAATGTCAGCTTTTTTGTTTCTCATTTTTAAAAATTATTTTTTAATT
++
+GGGFGGEGEFFFGGEFFGGGCFGGEGFAGF'GGGFFG0CDG>EFFGG>EA
+@test_sample1-1-1017/1
+AATATGTCAAAGGGATATCTGCACTCCTATGTTATTGCAGCATGTTCACA
++
+GECG9BF@GGGGGFDGEGFGFFFFE@;E>9CGBGFFGGG:?00>@DDDED
+@test_sample1-1-1019/1
+GGGTACGGGCCACTATAAATTCCTTCAGCAACTGGAAAGGAAACTTTATG
++
+GDFGGGFFGG/GC:GGGGGFEG3CGAG@FGEGG+@GFAFEEDEGGGFEDD
+@test_sample1-1-1021/1
+TCTGTGGCCCTTTGTATGTTGTCTCCTGAGAAATGTCTATTCCTGTCCTC
++
+EGGBGGEGGGGG9GGGFEFGG47CCGGGGGBCGDG&4GGF7FGEEC35GF
+@test_sample1-1-1023/1
+AGAATATTCAAACTCACTTTTTATCAAAGAAATGTAAATTTTAAAATATA
++
+$%ADGGGGGFDGGF3GG=G=GGEEE?FCEGG?G8GGGEGFDGACGGGGGE
+@test_sample1-1-1025/1
+AGGGCATCTTCAACAATGGTGGATGACGTCTTAGTTTCCCTCAACTCAGT
++
+GA@9GGG@4CF5E8GFCGGEG;GFGGGF;G:@EGGGEG:GDGG3FE;GAA
+@test_sample1-1-1027/1
+CCATTTTCCCACATTTGTTTTTACTTTAATGGATTAATTCTTCTGTATAA
++
+GGGGFGFCGGF*CE,@GGEF=GGGFGGD7.FGGFAFEDGGGFE3??/D?GGAEFABFBCFF
+@test_sample1-1-1031/1
+TGGTTTTTAAATATAATTTAAACCAAATTTAAAATAAGCATATAAAGACT
++
+GGBGFGFCBCEBGBGEG;FGGEGFDGFAFEEFFGGFGGDE-=FGGFGD>8
+@test_sample1-1-1033/1
+AAGAATAATGCTCTGAGGATTGAAATAATTGGAGATAGGAGGAAAACAGA
++
+EBFGGGGGEGFGFFGGDE@FFGGDBGDGGGGGEGGFFFGG$DGG?8$1CG
+@test_sample1-1-1035/1
+TATACATTTTCTTCCCAATGATTAACAGAGTTAATTAGTACCACTAGATG
++
+FEGF=GGFGGGGGGG@AGGGGFGGE2FGGFFFAF@'GGFGFF>GGC6:4F
+@test_sample1-1-1037/1
+AGTAGAATCAGCTCCAAGACAAAAGTATATATACATTTTCTTCCCAATGA
++
+CGFGFFGGDGG;FGDGGG?/GGFFFGGGGF@EEGFGGFF6FGCEBDA)F@
+@test_sample1-1-1039/1
+TTGTCTTGGAGCTGATTCTACTAGAAAGCAGAAATCAAAATGATCAGTTT
++
+EFFFG<2GGFGGF?GGGFFGGF>'GCG>GGEEG19GGGG((,FG<9GDGGFC.DFFFGGA9*CEE12
+@test_sample1-1-1043/1
+GCAGACAACATCAGCTTCACAAGTGTGTGTCCTGTGCAGTTGAACAAGAT
++
+GGFGCGFGBEGGB=EGFGG@=FGGGGEGGGGGFFGGGE7FGFDFDG39C=
+@test_sample1-1-1045/1
+TGACCACGATCAAGCTAATTAACGTAATTCATGATCTGACATAATTATCT
++
+GFGGFFFE?GFGAGG7FGGG7GDFG:GGF?BG=9FGGGDGGGC8FBBBFGG
+@test_sample1-1-1049/1
+GCTGAAGGAATTTATAGTGGCCCGTACCCTGTACAGTAACACATGAATGA
++
+GGEGEEFF1FDGFDFGFD83.E8GFEGGFFEFC@F
+@test_sample1-1-1051/1
+GCAATAACATAGGAGTGCAGATATCCCTTTGACATATTGATTTCAATTCC
++
+GEGGGFGCF?GEDEDFGDGGCEGGG5BEGDGGGGEGGFFDDFEFG7?%3,
+@test_sample1-1-1053/1
+ATTTATCCATTCATCTGTCTATGAACAGTTAGGTTGATTCTATATCTTGG
++
+E>?GEDGGGG:EDGGGGGEFGCFGEGF?GGFFG7BDGGGCFGGFDGGGGE
+@test_sample1-1-1055/1
+TATGCTACAATAAGAGTTGAGTAGTTATGACAGACACTCTAGGGCCTGTA
++
+=DCFFEFGF-7GFGEGGCGFG0:EFFGGGGFE.877BFFG>FEGGG=G@'
+@test_sample1-1-1057/1
+TGGAAAGGAAACTTTATGTACTGAGTGCTCAGAGTTGTATTAACTTTTTT
++
+GAFGGGEC,)).?GFG=BFDFGFEGFGGFEDEGGGGFGGA:<7(EFG
+@test_sample1-1-1063/1
+ACACTGTATTCCATAAATATATATAATTTTTGTCAATTAAAAAATAATTT
++
+G4EEEGEGGGEBGCGCGEGFGGGGG1F;GFGC8D;GG&AGAGFFEFGFB0
+@test_sample1-1-1065/1
+AAACTTTGGGAGCGTTTTTCTGACCTACCTGTTCTGTTCCAGGGTGTGCA
++
+EADFFGGGEGBFFGFGGGBGGEFDGGFAFCFGF'DGFGGFG8<>5GFEG>GGFEEFFDBFA1FEEE
+@test_sample1-1-1085/1
+TTTTTAAAAATGAGAAACAAAAAAGCTGACATTTTCAGATTAAAAAAATT
++
+G@G:DGEGGGFGEFGGFEGFFFEGBEEDBEEGFDFGGG=FGGFDAFGFF1
+@test_sample1-1-1087/1
+AAATTAAATAAATTATTTAAAAATCGGCAGAGGACAGGAATAGACATTTC
++
+;EGDGCGGGGGEGDFEFFG7GGGGGGBGG>GEGGGFG@G5FDFDGFG=DF
+@test_sample1-1-1089/1
+TACAATACATTATTAATTACAGTCAACATGTCGTATATTGGGTCCCCTGA
++
+GFG:GGGGGGFGGGGGGGGGGGC>GFGDGGFEDGFEGGFGF2EGD?BFEF
+@test_sample1-1-1091/1
+TTTCATCTTTTTGATAATAGCCATTCTAACAGGTTTGAGGGGACATCTCA
++
+GGEGDGGDGG9FBGG@GGEFFGGEGFFBGEG9;FGGGFGFEFGG5>GGC.
+@test_sample1-1-1093/1
+CCTACTTCTGGGTATATAGCCAAAGGAATTGAAATCAATATGTCAAAGGG
++
+FGBEBGGGGBGDFFGGGDCGFGFFGGGD""G2E>GFB8EFBED?FG9CFGDE<>@F5DF@DGF
+@test_sample1-1-1099/1
+TCTATCTTCATCATCTTCATATATAGTTGGGAGTAAAGATGCAGATGAAA
++
+GG@GGGG3BGGGEGFGGGFF8DFGF7CGFGFGDEFGEGFFGEEDGD1A6GGGEEGG;GG
+@test_sample1-1-1109/1
+AGTTTAGCAATGTCAAGGCAATCTCTCTCTTCTTACATTTCCCTTCAACC
++
+EGGFGFGEGFFFGDGBGFDG;GFGDGFEGFF>=GFEEGGGD84FEGEEDE
+@test_sample1-1-1111/1
+AATTTAAAATTTTTAAAAAACGTCTTTTTGAGGTTCGTACCTCACTTAAG
++
+G?GGGGEGDAFGFGGFGGGGEFFFGGD-BG@DGGFDGCGGFGFF6FFFD=
+@test_sample1-1-1113/1
+TGATATGTTATTATTTATGAAAACACAATAGCAATGCTATATTTTAAAAT
++
+BGGDGGGFG?FGGGG2&9AGCGBGDGG@EFE9GCFDDGG@5FGG:FFGGGE:FBDF<0EGEFEAB2>CG+
+@test_sample1-1-1119/1
+AGTGACCACGATCAAGCTAATTAACGTAATTCATGATCTGACATAATTAT
++
+DG8FG@FBFGCGGGGG3FGGFFFGFFFGFGADFFDGF%/$FFC=FB9GF:
+@test_sample1-1-1121/1
+GTGACCCATTGATGCTTCTTAGATCCTTGTAACCAGGCCTCCATTGATTT
++
+FGEGC?GGF8G7FEGGDGBGGEGFG<;=C3EC
+@test_sample1-1-1131/1
+GTGACCCATTGATGCTTCTTAGATCCTTGTAACCAGGCCTCCATTGATTT
++
+FG>CD4GDCDGGF/CFGGGEFFFDCGGF;GCGGEGFFFBGGGGEBFGDGE
+@test_sample1-1-1135/1
+AAACTTCTTCCGTAAAAGGCCAAAAGTAAATATTATAGGCTCTACAGGCC
++
+FGFDFGGCBGGGGFCCFGG@?GFG*%FAGGG?5$AADE7FGGG*'C>9F4
+@test_sample1-1-1137/1
+TATGAAGATGATGAAGATAGATATGGATGGTGCTTCTACCATACCCTCTT
++
+GFEFGFGFEGGGGEFFGFEFF?E8GE@GAGGCCFC
+@test_sample1-1-1139/1
+CAATTCTGTCATTTGAGACAACGTGGATGAACCTAGAGGACATTAAGCTA
++
+GGC>E?GFGGBG=GCGGGEGAG@GGEGGGGGDF8BEA=FFFFGEEDC5C>
+@test_sample1-1-1141/1
+TTTCGTTTTATTAACAATGTCCCATTTCCTCATCCTCTCCCTGCCAGATA
++
+CGGEGBG;G>GGGGF@GGDE;G>EGFGFEFGC44GG9
+@test_sample1-1-1143/1
+TCTTGCTGCTGCTCAAAAAAAAAAAAAAGTTAATACAACTCTGAGCACTC
++
+FG?BGDGGFGDGGFGFDF.GEG,;FE:GG:CCEE
+@test_sample1-1-1145/1
+CCATGAATGAGGGCATCTTCAACAATGGTGGATGACGTCTTAGTTTCCCT
++
+DGFGBGBFGC=GEEFGGFGGFGGBGEGECGFDGFDGFCGGG(GDDGCFFF:GG.DGGFG
+@test_sample1-1-1149/1
+CTAATGTAAACTGAATTTGTGAGGACTTTGGTTAAAAAAATTGTTAAGAA
++
+E@DGDGGBGGGGGEGGGGFEGG8GGE9GGFGGEGEC;G8B3EEFGGGGF5
+@test_sample1-1-1151/1
+TCCAGGGTGTGCAGTAGTGACACTGGGAAACTGATCATTTTGATTTCTGC
++
+>GF4GGFGFDF>GBGGGFGGGGDCFGFFFGEEAGCGF4GFGGE>0G1?EE
+@test_sample1-1-1153/1
+TTATTTATTTATTTATTTATTTATTTTACTTTTGACATATTTGTGTTACT
++
+ACFF6G>GCFAF?GGGGCGFAEG@-?G:DGFFEGGEFGDFFG2BDEGGGG
+@test_sample1-1-1155/1
+AAATGTCAGCTTTTTTGTTTCTCATTTTTAAAAATTATTTTTTAATTGAC
++
+GGGEGGC.CFGGGGGE@FGFE:=EFFFFDFBFGG>EFA=FFBGDFDE4DF
+@test_sample1-1-1157/1
+TCCCACATTTGTTTTTACTTTAATGGATTAATTCTTCTGTATAATTTTTT
++
+*F>G??E<=FFGFGFFGG;FGGAGGGCACGECDCFAGG=F>"=GFGG7FG
+@test_sample1-1-1159/1
+TTCTAAAAGAATATCTTGACACACATTAAGCTCACTGACCCCCACACCAT
++
+)?GF/GFG?GGGGDFAEFGGGGGFFFCGGFGGG?=G:FGGFFGE9-FGF;
+@test_sample1-1-1161/1
+TCAAATTTACCACCTCCAGATCTAGAGAAACAGTTTTGAGCCCTTTTATT
++
+GGFGGGEGGGCF*FCGGFGGF4@GGFCGCF@?<@
+@test_sample1-1-1163/1
+TCTGTTGACAGTTTTTATGCTACAATAAGAGTTGAGTAGTTATGACAGAC
++
+BGGFFEGGGGG=GFEEEGGFGAFGGEFFE@EGG?*FDCGGGBDGGFGFFDGGFE8
+@test_sample1-1-1169/1
+TATAAATTCCTTCAGCAACTGGAAAGGAAACTTTATGTACTGAGTGCTCA
++
+GGGGFDGFGDGFFGGGEGGFGDFG4GB>=:G@EF=GE>FF;FFGGGF*,F
+@test_sample1-1-1171/1
+AGATGATGAAGATAGATATGGATGGTGCTTCTACCATACCCTCTTCCTCT
++
+GFGGG@GGGGA?@GGG>C?G;BBGD?GFGGCFGG33EGF2BGFC>GEGFG
+@test_sample1-1-1173/1
+GGAGTAAAGATGCAGATGAAAGTTGAGAAGGTAAGGTTCTTGAAGTCCTG
++
+GGD:G6GGGGGCFG9GGG?FBFGEF?9F?FFFGCGGB=EFFFD<>
+@test_sample1-1-1179/1
+CTCTAGGGCCTGTAGAGCCTATAATATTTACTTTTGGCCTTTTACGGAAG
++
+4GGGGGFGFGDBFGFGEBGGGGGAGGGGFG:GGBFGDGGGFGGGFGG=1>
+@test_sample1-1-1181/1
+ATTGCTAATACACTATATTGTAAATGTTAATATGAGGTAATATATGTGTT
++
+GGG?GFEGGFGGGGGGGAGGDFF7>FG8GGGGEGDGFGGE?@FGGBFF@G
+@test_sample1-1-1183/1
+CATGAATATTTTGAACAGTGTGACTTAAGTGAGGTACGAACCTCAAAAAG
++
+DE$)GEGGGFD:EFGGG=BG?FGGFFFF;FFGDCF@F141FFEGE:G1E*
+@test_sample1-1-1185/1
+AGCAGAGAATAGTATGATGGTTACTAGGGTTATCTGGCAGGGAGAGGATG
++
+GGGDGGEFGCGGGGFEGFFDCGF=DF3GGFFG?G?GGEG@EGBD@FGGGF
+@test_sample1-1-1187/1
+GTTTTTATGCTACAATAAGAGTTGAGTAGTTATGACAGACACTCTAGGGC
++
+89CAGGFGGF=FGGGGGEG?;CFGFGGGGBGB>BFFEGD@EEGFG68FF8
+@test_sample1-1-1189/1
+TTTGGCTATATACCCAGAAGTAGGATTCTTGGATCATGTGACAGTTCTAT
++
+EGGFDDCEGGGEGGGG7GGGEGDF:F3CFFA9FDGGFGAGGGED1DEGA<
+@test_sample1-1-1191/1
+ATGTTGTCTGCAATAATGCTAAGAATCTTAAAGTGTTGAATTAGAAGTTA
++
+EGFGFGCGGFECFGGFFG.GGGFDGGGGFD:DAFFGGAFGEGFFG'FGE,
+@test_sample1-1-1193/1
+ACTCCCAACTATATATGAAGATGATGAAGATAGATATGGATGGTGCTTCT
++
+FGCAGGF%>GGFFFEGCGGFGGBGFEG?G*D6FGGGGG-?F+GBFCFGB8
+@test_sample1-1-1195/1
+GACAGGAAAGTGATACCATGAGCTTACTTAGATTAACTGAGTTGAGGGAA
++
+GG3EFGGGGGGGGG6GGGGEFGG9EEFCGGEG2FCEFG?FFA1GEEEFGG
+@test_sample1-1-1197/1
+ACATACCGAGTTCTGACTAAATATAAGGGACATGGAGCCATGCTTAACTG
++
+GCEFFGGFGGECGFD=,G@GEF>GGGGGGFGG)GGGGGGGFG=A9GGG1FC<8FE<;E9AG2##FC6
+@test_sample1-1-1203/1
+CTCAAAACTGTTTCTCTAGATCTGGAGGTGGTAAATTTGACTCATGGGAC
++
+7?E>GEEG?GGGGFGGGFFFFGGGGFGGFGGF:%FG?CFC>B?FAG7%)F
+@test_sample1-1-1205/1
+CATCACATTACACACCATAAAGATATATAACTTGTTATCAAAAAGAAATA
++
+FGFG99GGGEGG?GFFFFGGEEGFFDFEFBGGFFCGDG4=CDEGGGAGB8
+@test_sample1-1-1207/1
+GATTTATTGTGAAGAGTGAAAGAACAAAGCTTCCACAGTGTGGAAGGGGA
++
+GFG?GGEFECFGBGGGEGGGGG>DFGG;FBGDBGGGFDFFDAG)@@F8=,
+@test_sample1-1-1209/1
+GTTTTAAGGAAAGAATGGTTACACGAAGACATACATTCATTCATTTATAC
++
+GG@2A>GGGEEFGEG7FGGGGGG@AGGGFGGEGGCDGGF?GBFG:BGFG3
+@test_sample1-1-1211/1
+AAAGAATGGTTACACGAAGACATACATTCATTCATTTATACAACACATTT
++
+GFEEE:F6GFFECGGCG
+@test_sample1-1-1215/1
+TGTAATCCTACTAATTGAATTTTTTTCGTTTTATTAACAATGTCCCATTT
++
+FGG5FGDF<5GGEFFGGGFCGFGGGG1FCG?D.FGGBDGFFFFGEFFDGF
+@test_sample1-1-1217/1
+GTAATCCTACTAATTGAATTTTTTTCGTTTTATTAACAATGTCCCATTTC
++
+GFCG83G?FGFBG6GDGGG@GDGGGGG=8@>GGG@;FGGCGGF@EE4B>B
+@test_sample1-1-1219/1
+ACTATCAGAGAGGATGATAGAAAAGGGAACCCTTGCATCATGTACAAATT
++
+FGGGEGGGGGFFGFFGEGDD6>GEGG=G3DGFGG8G<.E
+@test_sample1-1-1221/1
+AAAATGTATATATACTTTTGTCTTGGAGCTGATTCTACTAGAAAGCAGAA
++
+G=GEEFGGGEGFGGFGGGFFG=D3;GGFEEGG6FFF8GE2BGFF9CG2.=
+@test_sample1-1-1223/1
+AATTTATAGTGGCCCGTACCCTGTACAGTAACACATGAATGAGAGAAATG
++
+FGEGGCGEGGFECDGGGF6EGGG3EGFGGDEFG;@FGEG3GD?GEGFEGGFAEG8CGC=
+@test_sample1-1-1229/1
+GGACTTCAAGAACCTTACCTTCTCAACTTTCATCTGCATCTTTACTCCCA
++
+GAGGFFGGEBCAFGGGGGFGCGG8EEFFGEGFCGG;GGEFB0;:1GGGFC
+@test_sample1-1-1231/1
+TAATTCAACACTTTAAGATTCTTAGCATTATTGCAGACAACATCAGCTTC
++
+B>GGGB6GGAGGGG.DEG4GG3DGGGGGGGGBCFFGGFDCGG:GGFGFCF
+@test_sample1-1-1233/1
+TTGGTTTAAATTATATTTAAAAACCATTTTCCCACATTTGTTTTTACTTT
++
+AGAGEGG.G?AGFGEFGGGECFGGFG;EGD?GEGEGF?GFG:E@GF?EFGGGDG?GE5GGGG9F0?
+@test_sample1-1-1241/1
+CAATATAGTGTATTAGCAATTTTCAAGCATACAATACATTATTAATTACA
++
+GCGFGGGFGGGG?@GFGG:GGGGGEGD<7F)>FGGGGGFCEDC@FGBG
+@test_sample1-1-1243/1
+CCTTCTCAACTTTCATCTGCATCTTTACTCCCAACTATATATGAAGATGA
++
+FEG:,EGFDFGFA?EGGGEGCGG68FGBD:DF9AGGGE%?=>@G76@@??
+@test_sample1-1-1245/1
+AAAAGATAACCTCTTGAATGGGAGAAATATTTGGAAACTACTCATCCAAC
++
+@FG<9GGFGGGGG:F$EFCGG6EGFFCFFG8DA
+@test_sample1-1-1253/1
+ATTTAAAAATCGGCAGAGGACAGGAATAGACATTTCTCAGGAGACAACAT
++
+GGEGGFDGGFDCGGGFFFGGGFD>FGGGEG<24CDFGGGF.8EG@G;GGC
+@test_sample1-1-1255/1
+TTTCCTCATCCTCTCCCTGCCAGATAACCCTAGTAACCATCATACTATTC
++
+GGFGGDG5;GFGDGEGGGGGBF/>==
+@test_sample1-1-1261/1
+ATAGAGTGAAAAAGATAACCTCTTGAATGGGAGAAATATTTGCAAACTAC
++
+72FG6EEGGFGCBGGGGGF@ECEGFF?%2%6GGGGGEFGF;7FFF>GF,4
+@test_sample1-1-1263/1
+TGACCACGATCAAGCTAATTAACGTAATTCATGATCTGACATAATTATCT
++
+FGG=FGFGG;3FCFAGGGG?GDBGGGGD>FCFGCGGDFFFGFGGFGFGGGGG@FGCGGFFE;?GG9GG?EDG
+@test_sample1-1-1269/1
+ACTTCCCTGACAAATAGTGATGTTGAGCATTTTTTGATGTATCTGTGGCC
++
+GGE>GGGG=DGD1FGGDGEGGGG9$.GGGG@E<@EFFFGAAEGCGC4@C5
+@test_sample1-1-1271/1
+CTGAACTCATAGAAGCAGAGAATAGTATGATGGTTACTAGGGTTATCTGG
++
+F@FEGG+;GG@AGGF:GCGGFFGDFG?GGCF>FF>AGGFGGFB8=GGFE?
+@test_sample1-1-1273/1
+AATACAACTCTGAGCACTCAGTACATAAAGTTTCCTTTCCAGTTGCTGAA
++
+AGGGE5GGEGEEGF=F4BGGGGFEGG1?FFGGFDGEG8=GDFGGDGC&%$
+@test_sample1-1-1275/1
+CTGCTGCTCAAAAAAAAAAAAAAGTTAATACAACTCTGAGCACTCAGTAC
++
+GGG<7GGFCEFGGGGGGEGGG'GFG@GFGGEGFG#E;DGACDAGFDC@D)
+@test_sample1-1-1277/1
+ATTCCTTCAGCAACTGGAAAGGAAACTTTATGTACTGAGTGCTCAGAGTT
++
+G9EEGFGGG;GDEGFFFGGGFBFDG@GGFGGDGG7GFFGBC:=A1GFGFGGGEEEEFGEEGFGGGG
+@test_sample1-1-1281/1
+AATAAATCTTGCTGCTGCTCAAAAAAAAAAAAAAGTTAATACAACTCTGA
++
+E9=GGG8GGGGG1?DGG>=GGGC>GGFF;DF4G;FGC>F7GFB>DGGG>4
+@test_sample1-1-1283/1
+AGTTAAGCATGGCTCCATGTCCCTTATATTTAGTCAGAACTCGGTATGTT
++
+GFEFGGGFGGCDGGGGEGGGBFD;F6FGDGG5FGFF@GG/GG9CA$GBGF
+@test_sample1-1-1285/1
+ATTATACAGAAGAATTAATCCATTAAAGTAAAAACAAATGTGGGAAAATG
++
+EG@G;CGGGGDFF@GG?@GFEGFEGGGFGGG4FGGGGBBBFFFDGCGGEG
+@test_sample1-1-1287/1
+GACATTAAGCTAAGTGAAATAAGCTAGACACAGAAAGACAAATATTGCAT
++
+GGGF2G=D3GGGGGF7A>GG0FCEGCGG>FCGGGGGGFF
+@test_sample1-1-1289/1
+AAGGTTGAAGGGAAATGTAAGAAGAGAGAGATTGCCTTGACATTGCTAAA
++
+F<7GGGDEGGEFGGGGGGGFGGGFG=CFGAC+7EG*FFFF@>CG)DB?F
+@test_sample1-1-1295/1
+TAAATATAAGGGACATGGAGCCATGCTTAACTGTTGAGCAAAATAAAAGG
++
+F:FF,D>GGFGGGG1GGGFFEFDGFGGFFGGB.8GCGFEEFEGGG>G@FG>.AF@GD@;G8CGGEFFGDGFD%DGGGCEFGFACAF
+@test_sample1-1-1301/1
+GGCCATTGTGAACATGCTGCAATAACATAGGAGTGCAGATATCCCTTTGA
++
+EGF5EEGGGGG=A1GGFE8GGGGBGGGGEGGFFFG>>@=@7
+@test_sample1-1-1309/1
+CATTATTAATTACAGTCAACATGTCGTATATTGGGTCCCCTGAATGTAAT
++
+FGEEFGGGGBCDGD?BA
+@test_sample1-1-1311/1
+CTATTGTGTTTTCATAAATAATAACATATCATGGATGAGCCTGTGAGGAA
++
+FGF>FFEGGFDGG<=>EGA?GG5E;GDGCGGGGGAEEFFFGGGFAFEFGF
+@test_sample1-1-1313/1
+TGTCAGGGAAGTACTAATTAAAACCAAAATGAGATGTCCCCTCAAACCTG
++
+FFGEGGG0EFFGFGAFGGGGGFD:@F@GGG;GGBAGECCGGDEFGG19CF>G0?78D>2
+@test_sample1-1-1319/1
+TATTGTCTGTTGACAGTTTTTATGCTACAATAAGAGTTGAGTAGTTATGA
++
+EEGGGADFGGGDFGFDFGGGFDGFGFEFGG7FFGE8F=G4G;FGF5EFE0
+@test_sample1-1-1321/1
+TGATAATAGCCATTCTAACAGGTTTGAGGGGACATCTCATTTTGGTTTTA
++
+DBGDFGFGGADBGGGG@GGGGGFGDFFGFGGAG.CFE7?FGG@6F='/FB
+@test_sample1-1-1323/1
+CCTTATATTTAGTCAGAACTCGGTATGTTTTAAGGAAAGAATGGTTACAC
++
+GAGDGGCGGDGFGGGGGGBGFGGG+F3GAFGGGGGFDF$GGE@G@F+DGG
+@test_sample1-1-1325/1
+ATTTTAAAATATAGCATTGCTATTGTGTTTTCATAAATAATAACATATCA
++
+FFGCGFFFEFGGFGFGGGGGGG?GGDFFEEEGGFFEBD@CE=D3DGFEB;
+@test_sample1-1-1327/1
+AATTAAAAATAGAACTATCACATGATCCAAGAATCCTACTTCTGGGTATA
++
+FGDGGGGFEFG=GGDGGCG@GBFFDFGDEGGCEEEF&DEG@;FGFGGF@(GGGG7GGGFFG6FBF7CDEE=*>
+@test_sample1-1-1335/1
+GAAACAGCAGAGCATTAAAAAAAGTGTAGGATCCTTTTAAGTGTGGGATC
++
+GEFF3FFEDEGGFGGAFGGGFGFAGEFFGFFGGGGGGFCB;7FCE48$;G
+@test_sample1-1-1337/1
+TCCCTCTAGGACAGGAAAGTGATACCATGAGCTTACTTAGATTAACTGAG
++
+G)>GEGFGED?FGFGGCFGGEGDFGEDCFBGDFFCGFFDF0GGF9DA4;F
+@test_sample1-1-1339/1
+GGAAAATTATTCAGCCTTAAAAACAGTAGGCAATTCTGTCATTTGAGACA
++
+DFGGGGGB9GEGCAGGGDEGGGFGF$-F;97A0DFCDG2GGGGFE>BCG<
+@test_sample1-1-1341/1
+AATTTATTTAATTTATTTATTTATTTATTTATTTATTTATTTTACTTTTG
++
+GGDGGFGGGDGGGGF;%GF?CGGGGGFGGGGDGFFGB:EE1GFGE6=+9G
+@test_sample1-1-1343/1
+TTGAAATCAATATGTCAAAGGGATATCTGCACTCCTATGTTATTGCAGCA
++
+F;>GGEFBGGGGFGGGGGGFFGEG7/4GGFGFGGFGFG1=GGGFFFF::
+@test_sample1-1-1349/1
+CTTGCATCATGTACAAATTAAAAATAGAACTATCACATGATCCAAGAATC
++
+C/EG=9DGGBDGG@GGGGGGGGDGEEFCDFGCGEFGDDEF,:ADGD.(D=
+@test_sample1-1-1351/1
+CTAACAGGTTTGAGGGGACATCTCATTTTGGTTTTAATTAGTACTTCCCT
++
+GGFFGGGBBGGGFG@F.@DAEGG@@GFF8FGGGG9$@EFGFGCFFECA@<
+@test_sample1-1-1353/1
+CTATCTTCATCATCTTCATATATAGTTGGGAGTAAAGATGCAGATGAAAG
++
+G=FGG5GGFGGFGAGECFFFBGGC@F@FGGG<2
+@test_sample1-1-1357/1
+TCTGAACTCATAGAAGCAGAGAATAGTATGATGGTTACTAGGGTTATCTG
++
+GDFCFF=GGGGGG,@GGGGGGFDFFFGGG7F/7FFF@FBGEGFCFGG4GF
+@test_sample1-1-1359/1
+ACATCCTGATTCCAGACAATGTGTCTGGAGGCAGGGTTTACAGGACTTCA
++
+GGGEFGGGF5GAGGGGGCFFGGF8EEGGG<>E0EEF
+@test_sample1-1-1361/1
+ATCATCTGTTAACTATTCCAAAATTTATCATTCAACACTGAAAATGTGTT
++
+FG8GG=GGGGGFGFFEEE?GGGGGFF@GG>16E@E=BDEGGG
+@test_sample1-1-1363/1
+CATGATGCAAGGGTTCCCTTTTCTATCATCCTCTCTGATAGTTGCTATCT
++
+GGG8FG3FFGGGGGFG8#%EGGEE8F+FFFF<=8C?DGGGG
+@test_sample1-1-1365/1
+TACTAGAAAGCAGAAATCAAAATGATCAGTTTCCCAGTGTCACTACTGCA
++
+1;FFGFDGFEGGG7BE0>FGDG:,FGCFGGF5DDG
+@test_sample1-1-1367/1
+ATATATACTTTTGTCTTGGAGCTGATTCTACTAGAAAGCAGAAATCAAAA
++
+FFFEGGGFGCGGFFGF
+@test_sample1-1-1371/1
+TGTTAGTGACCCATTGATGCTTCTTAGATCCTTGTAACCAGGCCTCCATT
++
+AGGFAFGGGFGGCDGGFGGGGDG(CG@F=GGDF9:FFEGDG((EGGFAD6
+@test_sample1-1-1373/1
+CATTAAAGTAAAAACAAATGTGGGAAAATGGTTTTTAAATATAATTTAAA
++
+E?A&E4CGGGGDBGEFEF;GEGEGGGD5GGF?*GGD9F<3F?BFFF8F@8
+@test_sample1-1-1375/1
+ACATCTAGTGGTACTAATTAACTCTGTTAATCATTGGGAAGAAAATGTAT
++
+GFGEFGAFF7GGGGGGCF=GEFFF;DGFF9GFGGGG0@4-2GGFDEGGC8
+@test_sample1-1-1377/1
+AAGGAATTTATAGTGGCCCGTACCCTGTACAGTAACACATGAATGAGAGA
++
+FGBGDFGFCGFFGEFGFFFFGGB/EFADGEFFGGGFFGDEFFFDA159BE
+@test_sample1-1-1379/1
+GACATTTCTCAGGAGACAACATACAAAGGGCCACAGATACATCAAAAAAT
++
+FDGGF@G9GGBGGFGG?FFGGCEGG@AGAGGGGEGFCGF8B.(FC9BFFGFGFGGGGF6
+@test_sample1-1-1387/1
+TTTTAAAATATAGCATTGCTATTGTGTTTTCATAAATAATAACATATCAT
++
+GFGG@GEFEGGD?GGGGGEG?EG9GFDDGGCBFGFGGFGGG:GGG@:F.E
+@test_sample1-1-1389/1
+AACAAAAAAGCTGACATTTTCAGATTAAAAAAATTATACAGAAGAATTAA
++
+GGGGEEGGGBGEGGDF1FGDEF?%GGG7EGCGGGGFG8G0;78$4GAFEG
+@test_sample1-1-1391/1
+CAAAACAAGAAATCCACATAAAAAATAAACATATGAAGAATATTCAAACT
++
+GGGGFGF7GFB-&EGGFDGGG@CGFCGBGGGGGGGGFBGGDFCGGFGGGG
+@test_sample1-1-1393/1
+CATTGTTGAAGATGCCCTCATTCATGGTGTGGGGGTCAGTGAGCTTAATG
++
+EDEGGFFE;G2FCG>GGGGF%/FFFGGGGDGGGFGGG;FDGGGGGGGFDG
+@test_sample1-1-1395/1
+ATTAGTACCACTAGATGTTAGTGACCCATTGATGCTTCTTAGATCCTTGT
++
+FGGGDGF@GGGGFGGFFGF-FFGFFGFGGFFBGE=+>FAFE?EEEGGFFE
+@test_sample1-1-1397/1
+TGAACCTAGAGGACATTAAGCTAAGTGAAATAAGCTAGACACAGAAAGAC
++
+E@GGDC.8>GFAGGFGDGGGDGECGGAG;FCG.CDFGGGGGGGGFG'+0E
+@test_sample1-1-1399/1
+TTTTACGGAAGAAGTTTACTGACCTTATCCTAGATCAAGGAAGTTTGGCA
++
+FGGGGFGB@5GD;BGBEGGEFGFAG?GC?4GGFDAFEAFCFEGGEF>DCA
+@test_sample1-1-1401/1
+GTATTGTATGCTTGAAAATTGCTAATACACTATATTGTAAATGTTAATAT
++
+DGEEGCDG?GGGBEG/FGGG?DGGGGGGCBBFGF-G4GBGGEBEBDB-8G
+@test_sample1-1-1403/1
+AAAATTTTAAATTGTAACTGCTATATTTCTTTTTGATAACAAGTTATATA
++
+G=-C>GF>BGGG5FGF8AGGF4GFGGAEGEG2=GFEFF@:7:9&C9FGGC
+@test_sample1-1-1405/1
+GTCTATGAACAGTTAGGTTGATTCTATATCTTGGCCATTGTGAACATGCT
++
+BFFG8FGEGGG5E@GGGGFG0GFGGGCDGFGGBFBGGGGEGAGGE<.D@F
+@test_sample1-1-1407/1
+TATTTATCGTGAAACAGTGACCACGATCAAGCTAATTAACGTAATTCATG
++
+ED5BGGGGGGCFG?GGFFGGEF>FGFFFGFGAGGGDFDFGFBF786GEGG
+@test_sample1-1-1409/1
+GCTCAAAACTGTTTCTCTAGATCTGGAGGTGGTAAATTTGACTCATGGGA
++
+GDDG=EF%()GGGEFGFGEGGF=GGGGFGG2GGGB@EA8AGFC1C
+@test_sample1-1-1415/1
+AGTTCAGATTTTTTAGATGCTAAGTGAGATCATGCAATATTTGTCTTTCT
++
+GFGFDG9GEAGGGDDEFG?E+FGFGGFEGGGGFEGFDFF)CF0A9BAB.G
+@test_sample1-1-1417/1
+CAAGAACCTTACCTTCTCAACTTTCATCTGCATCTTTACTCCCAACTATA
++
+FGFGGGGD=FAEB?FFGG@GF5BEDE=FGGGGEEGGFGFG8FG>?B?C9A
+@test_sample1-1-1419/1
+CACCATAAAGATATATAACTTGTTATCAAAAAGAAATATAGCAGTTACAA
++
+GF5GBCG@GDGGGGCDGGBGBGGGEGE?EDGGEC?C:FGGF?)/E
+@test_sample1-1-1425/1
+TTGGGTCCCCTGAATGTAATCCTACTAATTGAATTTTTTTCGTTTTATTA
++
+FG=FGCGGEGGGGGG>FDGGGFGGFGGGG?FDC;GF?FFAGGGGF?DECF6GGGG)@FFGGFFEGGGFGGGF@GFGGF%@A3=27GF
+@test_sample1-1-1431/1
+GAAATAAGCTAGACACAGAAAGACAAATATTGCATGATCTCACTTAGCAT
++
+GGGGFG>GGBGGG'@FGGFGGGFFGFGGGGGGGG;FGGGG351G)GGAF<
+@test_sample1-1-1433/1
+TGTCAGATCATGAATTACGTTAATTAGCTTGATCGTGGTCACTGTTTCAC
++
+G:FBGGGGGFGGEFFD?CFFGGF3GGGGC8>GECGGG@7@GG:EFGAEFD
+@test_sample1-1-1435/1
+CAACTGCACAGGACACACACTTGTGAAGCTGATGTTGTCTGCAATAATGC
++
+GGGFGGGEG>FFGFG>C=GGGGGF?*GGGF;GGDEFEEG9GFD2CGFD6G
+@test_sample1-1-1437/1
+CAATGCTATATTTTAAAATTTACATTTCTTTGATAAAAAGTGAGTTTGAA
++
+GFFGBGGFGGAFGDGGGDFEG@GCGGGD@>FFGFGGGE72E>9EDA?FGG
+@test_sample1-1-1439/1
+TACTTCTGGGTATATAGCCAAAGGAATTGAAATCAATATGTCAAAGGGAT
++
+GEFGGBGGGGGGFF>EFCFFGDGFFE?GC1EF.ECC3GEGGGCE%+9F>D
+@test_sample1-1-1441/1
+CTGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGT
++
+GGGCGGGFG5GGFGFCEE@GGGGGCGBFFFGG>DGGCF?AC3EG?4GCAD
+@test_sample1-1-1443/1
+GTTCCAGTGAAACTTTATTTACAAAAGATTTGTCCCATGAGTCAAATTTA
++
+GG8D@FG)EBGGEG?EF@EF>BGCGBGFGG/GGGGGGGGFFDFEDDCG,9
+@test_sample1-1-1445/1
+TTTTGGTTTTAATTAGTACTTCCCTGACAATTAGTGATGTTGAGCATTTT
++
+FGGEGGEGGEAGF?GCG9FGDCGGFAF>EGGFFFEEFGFDCEFGDB.'@A
+@test_sample1-1-1447/1
+GGAACCCAGTCACACTCATTTGTTTCTGTATTGTCTGTTGACAGTTTTTA
++
+GFGGEGGGFGGFEEGFGGEFEGDDGGGGCF3CDE?GGCEDAEE@DB1E';
+@test_sample1-1-1449/1
+CCATTCAACACTTTTTTTTTTACTTCATTGGATAGGGATGACTGAAAACA
++
+GG=FGEABGGFGGG@3G@6BGGBGFDGGGGGFBEGFGGGG1)FCB?D?-A
+@test_sample1-1-1451/1
+GATTCTACTAGAAAGCAGAAATCAAAATGATCAGTTTCCCAGTGTCACTA
++
+FGGGGFE?EEGFGF?FGFGGFFGGEEDBGGFGGFGG?=GGGE>BFGG$AF
+@test_sample1-1-1453/1
+GCTGCAATAACATAGGAGTGCAGATATCCCTTTGACATATTGATTTCAAT
++
+G>GAGGBG5EG##GGGBBFGFGGGEFG&FGFGDGED+FG
+@test_sample1-1-1455/1
+ACATTTGTTTTTACTTTAATGGATTAATTCTTCTGTATAATTTTTTTAAT
++
+GGGGGGFGFGG=6FD2GBGFAGFGGGGF+CGGGFGGGG7GGGCE#@BFFF
+@test_sample1-1-1457/1
+GAACTCATAGAAGCAGAGAATAGTATGATGGTTACTAGGGTTATCTGGCA
++
+F@GDGED;GFGGFGG#@GGGGEGG'GEFE>GF8$%8D>G8E.2A=>GGG=
+@test_sample1-1-1459/1
+TTAAAAAACGTCTTTTTGAGGTTCGTACCTCACTTAAGTCACACTGTTCA
++
+>-GG2GDGGDFFGF>;GG;GGGGDEGGFEGGFAG=8GGGF;AGFG;FBGD?:DB?+6D69FB2GB
+@test_sample1-1-1467/1
+AAGAAATATAGCAGTTACAATTTAAAATTTTTAAAAAACGTCTTTTTGAG
++
+GGFGGEDGGFG.GFGGFG4GGG8FGGGB@GB<9GFGGGGGGGGGG;69:G
+@test_sample1-1-1469/1
+GAGGACTTTGGTTAAAAAAATTGTTAAGAATTTCAAGGCAGAAACAGCAG
++
+FGGFC5B6FGG=DFDFF:GGE9?FEFGGDGFF>;FGGFFBDEG=?FD<6F
+@test_sample1-1-1471/1
+CCTTATATTTAGTCAGAACTCGGTATGTTTTAAGGAAAGAATGGTTACAC
++
+G6GGGCGGGGG>5DGF8GGC?5DGFBGDFCBGGCDEEF
+@test_sample1-1-1477/1
+GAATTAGAAGTTAATACAACTGGGCAAACCGCTCGGGTCCCCTTCCACAC
++
+GGFGGFGGFE5>;GGGFBEGF?FFGFGGFDEFGGFGAGGFEGFGFGD07@
+@test_sample1-1-1479/1
+TCACAAATTCAGTTTACATTAGCCCTGCAATCATGTAGACATCCTGATTC
++
+GGGG8FAFECEGBCGG8GFGG5DFFGFBGGGDGFFGGDGG?G>FF33E@2
+@test_sample1-1-1481/1
+TGAACAAGATCCCACACTTAAAAGGATCCTACACTTTTTTTAATGCTCTG
++
+DGGGGGGAFEEG1GFGGGGGGGFGFGGCFBFG>GGFFCGC8GFBF/GGCF
+@test_sample1-1-1483/1
+TAATACACTATATTGTAAATGTTAATATGAGGTAATATATGTGTTAATTA
++
+FGGGGGEBCFGGDDGGFFFGG=GG=G7GGEFGG@GGADG$FEFGDGBFA;
+@test_sample1-1-1485/1
+CACAGTGTGGAAGGGGACCCGAGCGGTTTGCCCAGTTGTATTAACTTCTA
++
+GGGFFGGGFGEGGDGGE5GGGFFFFGEFEBF;F0GG?BC@>GED=EFGFG
+@test_sample1-1-1487/1
+AGAGTGAAAGAACAAAGCTTCCACAGTGTGGAAGGGGACCCGAGCGGTTT
++
+GFGG5:>FF:4FCGFGGGGDGGG.G9GGGGGGGFGFGFGFE?CBCF3??3
+@test_sample1-1-1489/1
+AATTAACGTAATTCATGATCTGACATAATTATCTTAGTCATTGCTTTGCA
++
+GGCFFE?GGCGGGGGGFEFGGGFGGGGGEAGGFCGG*FGFDEFGGGGG6G
+@test_sample1-1-1491/1
+AAAACATCACATTACACACCATAAAGATATATAACTTGTTATCAAAAAGA
++
+GBGGFG=FGGE2FGFGGFEDEF@GGFGB7G8FG>FGG@C%7GGCGDFF6G
+@test_sample1-1-1493/1
+ACATTTTCAGTGTTGAATGATAAATTTTGGAATAGTTAACAGATGATAAA
++
+GFFFGGGGG>FGGEGFB:7GGGGFGF5GGF@GE;E
+@test_sample1-1-1495/1
+ATCTAAGAAGCATCAATGGGTCACTAACATCTAGTGGTACTAATTAACTC
++
+GGG:GG6BFGGDFGFF>GGG<@GGFGGGEGGFFGA.GEFGGAG=D;GC:F
+@test_sample1-1-1497/1
+TTCTGACCTACCTGTTCTGTTCCAGGGTGTGCAGTAGTGACACTGGGAAA
++
+EGGGGG90/F
+@test_sample1-1-1499/1
+ACAGTGTGGAAGGGGACCCGAGCGGTTTGCCCAGTTGTATTAACTTCTAA
++
+GGGFGFGGFDGFG6;GG:FFGEFGE@GFF@GFE?GGGGGGFG=37FGGFG
+@test_sample1-1-1501/1
+GTTAATAAAACGAAAAAAATTCAATTAGTAGGATTACATTCAGGGGACCC
++
+FFG>GGGECGGGGC53DFFFFGF8GDGFGGG=GEFGG73=GGF=GGE1C3
+@test_sample1-1-1503/1
+ACCTTACCTTCTCAACTTTCATCTGCATCTTTACTCCCAACTATATATGA
++
+EEGGEGD;GGGCG=FGGFGFF=GAFGGFG>F)FGEG<1GFG7:FCG)-GG
+@test_sample1-1-1505/1
+TAGTTATGACAGACACTCTAGGGCCTGTAGAGCCTATAATATTTACTTTT
++
+;G8GGGGGGFGFGF8GG%6GGFEGG@@G6@FGFGF=6=G$?GGGD=F9FF
+@test_sample1-1-1507/1
+TGGGAGAAATATTTGCAAACTACTCATCCAACCGGGGATTGATATCCAGA
++
+GGGGFFEGFF8FEGGFEGE8EF2FGGGFEGG3FEFAFFGGGGGGED>5FE
+@test_sample1-1-1509/1
+AGTTAATACAACTCTGAGCACTCAGTACATAAAGTTTCCTTTCCAGTTGC
++
+GGGFFCGGFGGGFGGGFGFEGGGAEA=GEGGEGGGBFDFDGGGFD>4F<>
+@test_sample1-1-1511/1
+GAGTGTCTGTCATAACTACTCAACTCTTATTGTAGCATAAAAACTGTCAA
++
+G=GGFGGGGGGGGFGF?AG>5GGAEFFDE9
+@test_sample1-1-1525/1
+ATAAAAGGGCTCAAAACTGTTTCTCTAGATCTGGAGGTGGTAAATTTGAC
++
+FGFGGGGG?GDFBGGEGGDGGF@B>CF?BGGFFBD09DF;@D:E1B47EF
+@test_sample1-1-1527/1
+ACCATTGCATTAAAGCTCTCTTCCTTGTTTCTAAAAGAATATCTTGACAC
++
+G59GFG?G=GFGGGGGG=GB>GGFCDCGF1EFFGFGGGGECC8F@CGGGG
+@test_sample1-1-1529/1
+CTTTCTAGTAGAATCAGCTCCAAGACAAAAGTATATATACATTTTCTTCC
++
+GFEGEFGGGGGFBGG9@CGGG9G?FBFG:F8C=9
+@test_sample1-1-1533/1
+AAAACCATTTTCCCACATTTGTTTTTACTTTAATGGATTAATTCTTCTGT
++
+FGECG""GCGGGGGG2CG@FGGGFF@GGG6CDFFAGFFBFFGGEE17+5E
+@test_sample1-1-1535/1
+ATTTGTCCCATGAGTCAAATTTACCACCTCCAGATCTAGAGAAACAGTTT
++
+6F4DGDGGDGCGGFGFGFEG>DGGFEFAFGGGGGGGEEFGFE@G==:9G
+@test_sample1-1-1541/1
+TATCATCCTCTCTGATAGTTGCTATCTTTCATCTTTTTGATAATAGCCAT
++
+9=5:CG>GGDGGGA8BF
+@test_sample1-1-1543/1
+TAGGTTGATTCTATATCTTGGCCATTGTGAACATGCTGCAATAACATAGG
++
+CGG?FEGF+GGG<=@GGGGBGG/EFE&FFEB13G@BAFGFGEGGB8GF%5
+@test_sample1-1-1545/1
+CTCCATGTCCCTTATATTTAGTCAGAACTCGGTATGTTTTAAGGAAAGAA
++
+GFF;GGGGGGGGG6GGGFGFGGGFFFGGGFCFGDF@$FFGG>=GGDGGGEGGDGFGFG?GFGGFF?>GGED3DFG
+@test_sample1-1-1553/1
+CTTTTTATCAAAGAAATGTAAATTTTAAAATATAGCATTGCTATTGTGTT
++
+GD9EFGGF7DFFGFAFFEFDFEGFGGGGFGGGCGG7BEFGGGGGF<-FG<
+@test_sample1-1-1555/1
+CTGGGCAAACCGCTCGGGTCCCCTTCCACACTGTGGAAGCTTTGTTCTTT
++
+G6FFFGF+BCGGGGF%GGFDG@FGGFGFGGDG4:GGAEAFDFGBFE@E=6
+@test_sample1-1-1557/1
+TTGAATGGGAAGAAATCAAGAATAGTTATACGAATATCACCATTGCATTA
++
+GGGBFGGFG9GFGGGGF9DGFGAGFGGGFGGC9FGGGEGFFGF;FGCF?F
+@test_sample1-1-1559/1
+ATAAGCATATAAAGACTATGGACAAAACAAGAAATCCACATAAAAAATAA
++
+E4GBECGF4G:;GCGEFFFGGGGGEEDGFGEGGEBCGEGFGFF?BGDGG7CEG
+@test_sample1-1-1565/1
+AAATGAGTGCATGAATATTTTGAACAGTGTGACTTAAGTGAGGTACGAAC
++
+GFGGFGF>FCBE8GFC?GFDDGF>;43FFEEA
+@test_sample1-1-1567/1
+AATGAGTGCATGAATATTTTGAACAGTGTGACTTAAGTGAGGTACGAACC
++
+GEGGGDGGGGG>9BFGFGFF@GFGGFF8GGGA4F8+?FGFEFCDFABGGG
+@test_sample1-1-1569/1
+AAGACTATGGACAAAACAAGAAATCCACATAAAAAATAAACATATGAAGA
++
+DGFEGFGGGEBEG8G7AGG9C9FDGGGGGGCFGFGFFGDDG,//,6FGGG
+@test_sample1-1-1571/1
+TGGATAAATGAAATGTGATATGGAAAATTATTCAGCCTTAAAAACAGTAG
++
+FCEGEEGGEGBGGGGGGG?GABGGFG?D?4DDFGFGGFEFFFEGFG8DFF
+@test_sample1-1-1573/1
+TGAAATGTGATATGGAAAATTATTCAGCCTTAAAAACAGTAGGCAATTCT
++
+FGDFFEEFFFG1GFDGGEBG3BGGDDFGGFGFGGFDG6FC9G/%)=GG/G
+@test_sample1-1-1575/1
+TATAGTGGCCCGTACCCTGTACAGTAACACATGAATGAGAGAAATGAGTG
++
+FGGGFFFEGFDGFGGFDF@FFGGGEGFGFFGGGGGGGDCGGFFC>>FG:*
+@test_sample1-1-1577/1
+CCCTGCCAGATAACCCTAGTAACCATCATACTATTCTCTGCTTCTATGAG
++
+GGFGD<;GGGGGGCGEGFFEG8GGGGFGEECFGGFFEGFGGDGE:FGADG
+@test_sample1-1-1579/1
+TAAAAATCGGCAGAGGACAGGAATAGACATTTCTCAGGAGACAACATACA
++
+GGAFGFCGGGGGGBFGGGGGG@GGGGE6G7GFGGEADC
+@test_sample1-1-1581/1
+CAGTGTGGAAGGGGACCCGAGCGGTTTGCCCAGTTGTATTAACTTCTAAT
++
+GEFFGFG@DGGGGGGGGGGFFG>FGEGGAFCF129B@GG2?F@EFFGFBG
+@test_sample1-1-1583/1
+GCTATATTTCTTTTTGATAACAAGTTATATATCTTTATGGTGTGTAATGT
++
+GGGFGEGEFG5FGGGGGFDFGG>GEDGBEFGFGG80:@1@BGGFDFGGF%
+@test_sample1-1-1585/1
+ATTAACTGAGTTGAGGGAAACTAAGACGTCATCCACCATTGTTGAAGATG
++
+GGFEEF?GGAGGD9GBG1GEFFC9FGCBGDDGFFFG?BGE7EE0:9GFF8
+@test_sample1-1-1587/1
+GAATGGATAAATGAAATGTGATATGGAAAATTATTCAGCCTTAAAAACAG
++
+@EEGGFGF;>GGGGGGGEF,FG,7GFGDGGGGFGGA?E)G,EFEDGGGFE
+@test_sample1-1-1589/1
+CATAGGAGTGCAGATATCCCTTTGACATATTGATTTCAATTCCTTTGGCT
++
+CFCFE8GGGGGFE/GDGBGG7FFGGFFGGEG?GCDGFFD?DGFDAGF0CF
+@test_sample1-1-1591/1
+GATCTGACATAATTATCTTAGTCATTGCTTTGCAGAGTATGAGTGTCTGT
++
+GGEGGGCGFGGGFFGGGGDGGDGG3ECGEDGGA@?@G2G/FGGFF=;<*@
+@test_sample1-1-1593/1
+GTTAATATGAGGTAATATATGTGTTAATTAACTTGATTTATTCATTCAAC
++
+GGGGC@GFCGGGGGEGGGGFC@GAGG?GGFFDEF$:3;F?EFFE/17EEF
+@test_sample1-1-1595/1
+ATAGTTGGGAGTAAAGATGCAGATGAAAGTTGAGAAGGTAAGGTTCTTGA
++
+F@FGG@;FGGGE?GEDAFGGDFGGFDGDDCCFFGFGGFEGFFGED=7G6E
+@test_sample1-1-1597/1
+ATCTGTGGCCCTTTGTATGTTGTCTCCTGAGAAATGTCTATTCCTGTCCT
++
+GEGGGCFGGGEEFGF$7.GDGGFEGDBFGFGGGGFBCEGGEG3E%1>FDF
+@test_sample1-1-1599/1
+CCAAAGTTTAGCAATGTCAAGGCAATCTCTCTCTTCTTACATTTCCCTTC
++
+GD?GGGGG88G)DBG+GCG3EBGCGGG,8GGFGFGFGFFD1GGGFGFF:F
+@test_sample1-1-1601/1
+TAAGAAGAGAGAGATTGCCTTGACATTGCTAAACTTTGGGAGCGTTTTTC
++
+B0GGFCFGECGCAE;G>GFG@7@DG?FAGGGGG9=FFGGFGGGB5F?GFEF
+@test_sample1-1-1605/1
+GTTGAAGATGCCCTCATTCATGGTGTGGGGGTCAGTGAGCTTAATGTGTG
++
+GFGFGG>EGEEB?GEGG@EF=G?DEBCGGCGGGGGGFGG/E3'G:7FECF
+@test_sample1-1-1607/1
+GAATGAATGTATGTCTTCGTGTAACCATTCTTTCCTTAAAACATACCGAG
++
+GDGC6FGGGCGGFEGGGGGFG6F@CFGBGGGGCFEGGFAGFGF+3GD;FF
+@test_sample1-1-1613/1
+CAGCAAGATTTATTGTGAAGAGTGAAAGAACAAAGCTTCCACAGTGTGGA
++
+GDGGGAGE8GFFAGGEGCGGGEF7GG;GFGG-FAGGC@
+@test_sample1-1-1615/1
+TATCTTCATCATCTTCATATATAGTTGGGAGTAAAGATGCAGATGAAAGT
++
+GEF9>GGFGGGGGEG?BGGD6FGEDGGGG?;;9(FGGGEEGGCG/FFGGG
+@test_sample1-1-1617/1
+CAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGTCAAGGC
++
+EGGEF1EG3GGGGCFFEGGGEGFGFGGBFEGC??GE8EFB:FFG6>=GGEGCFDFGGGGGGGDGGGGGFFEFGGEF/5C4F?AD
+@test_sample1-1-1625/1
+CCATATCACATTTCATTTATCCATTCATCTGTCTATGAACAGTTAGGTTG
++
+GGFGG:=FBBGFGGGDGDBFFFGFDFGFF?GG;@EGD.C>CFCGFF
+@test_sample1-1-1629/1
+TTGGATCATGTGATAGTTCTATTTTTAATTTGTACATGATGCAAGGGTTC
++
+FGGGFGGG;F;6GB@,4BB4GFGGGG?GFEG>GGG;+ABFCDGGE7GEFG
+@test_sample1-1-1631/1
+AATCAATATGTCAAAGGGATATCTGCACTCCTATGTTATTGCAGCATGTT
++
+;D:GGG8:GDCGAGGFG=EFGCDBGGFGFGE4EGGFF?F@GGGGG9G=CE
+@test_sample1-1-1633/1
+AAATCAAAATGATCAGTTTCCCAGTGTCACTACTGCACACCCTGGAACAG
++
+GGE/GGGGGG1GGGGG<)E9GFGGGCGGFFFGFCGEBFCFGEGGDFDCF;
+@test_sample1-1-1635/1
+TCTTTCATCTTTTTGATAATAGCCATTCTAACAGGTTTGAGGGGACATCT
++
+GGGG
+@test_sample1-1-1637/1
+TAGGTTGATTCTATATCTTGGCCATTGTGAACATGCTGCAATAACATAGG
++
+GGG>GFFDGGGDGGG=GGFA9(=:G%FFEG8GGFGFGFF3GEE;DF;FGD
+@test_sample1-1-1639/1
+GTGTCTGGAGGCAGGGTTTACAGGACTTCAAGAACCTTACCTTCTCAACT
++
+EG>EFDFGEGGGEGG;)GFGF3GG?F=GFDFGF2EAAD@A/
+@test_sample1-1-1641/1
+TTATCCTAGATCAAGGAAGTTTGGCAGAGGAAGAGGGTATGGTAGAAGCA
++
+?AD;DGGBG;DGFGGFF?GGGGGGFFFA=3FFGGGGEAGGGG6BGGGGEGBC;EGGFGFGGGGF@DGGGGGFGF8G@CFC=FGF?FC
+@test_sample1-1-1653/1
+ACTGGGCAAACCGCTCGGGTCCCCTTCCACACTGTGGAAGCTTTGTTCTT
++
+GFFFCFGFFGGEGFGCGEFGDFFGFGGGGGGGD6+GDGFFGEGFDF@FFG
+@test_sample1-1-1655/1
+CATTTGTTTCTGTATTGTCTGTTGACAGTTTTTATGCTACAATAAGAGTT
++
+GFGFGCGFG<'59EGF9GGG>5EEFGGGBE?C:GGGGGF1FEG@E-8CEG
+@test_sample1-1-1657/1
+AGCTTATTTCACTTAGCTTAATGTCCTCTAGGTTCATCCACGTTGTCTCA
++
+>DGGGFFAGAD>CBGDGGFFGGGGFEBFG7FGGEGFEDFFGG6=FFFG=7
+@test_sample1-1-1659/1
+CAGAAACAAATGAGTGTGACTGGGTTCCAGTGAAACTTTATTTACAAAAG
++
+,G@2@FFG:F?GGGEG>G:*7GF=FFFGEGGDEFGE>?BGFGFGGBG@6GFGEGGAFGG>8E0.?
+@test_sample1-1-1665/1
+CATACAAAGGGCCACAGATACATCAAAAAATGCTCAACATCACTAATTGT
++
+@;GGBGGGFGGFGFGG/9DGGFEE:@F:GF6GF4GAGGFG,@DGGGGFC>
+@test_sample1-1-1667/1
+TTGCTAATACACTATATTGTAAATGTTAATATGAGGTAATATATGTGTTA
++
+G7>GGFGGCFFG@G6FEEGGGFE8GBG7?BGGFE?@07FFGG&BFGGGGF
+@test_sample1-1-1669/1
+TATTTTTTCTCTTGAGAAAATTAATATGACTCATAGATCTGGTTCCCAAG
++
+FGGEGAGGFGEFGGGEFEGGDF=GDFGGG@GGG81DF&FG8GGFGGEEGFG
+@test_sample1-1-1685/1
+ATAAATATACATATGTATCAAAACATCACATTACACACCATAAAGATATA
++
+BG@GG>GGGG&GGGFF.C>EGBGGGCEGF$GEGGG??B0>5-
+@test_sample1-1-1687/1
+ATAGAGTGAAAAAGATAACCTCTTGAATGGGAGAAATATTTGGAAACTAC
++
+GGFGG1FEGDFF'<
+@test_sample1-1-1693/1
+ATTTTACTTTTGACATATTTGTGTTACTTGTATATTCTGGATATCAATCC
++
+EGF@GG3AFG?GGGG@GGFAGGGGGGDF=GGGGGGGAGGGGEGGGGFF7FFB-5GGEDED@FGD7*EE
+@test_sample1-1-1699/1
+ATATTATAGGCTCTACAGGCCCTAGAGTGTCTGTCATAACTACTCAACTC
++
+GGGFFGBGFGG);GGF+GGFFGFGFFGGEGGFFABGFEE%GDFE=FBFCC
+@test_sample1-1-1701/1
+TGACATTGCTAAACTTTGGGAGCGTTTTTCTGACCTACCTGTTCTGTTCC
++
+G.CFG8GGDGEAFGGGFGF@FCDGG8@FGFGGFGEG>@FGG8%1FF>G@)
+@test_sample1-1-1703/1
+TATGTGTATGTTGTTGAATGAATAAATCAAGTTAATTAACACATATATTA
++
+GGDGGFGFFFGCGF*-GGGCGCDDGGFFCEGFDFF0GFB%,E3EGGFG>2
+@test_sample1-1-1705/1
+TCGTATAACTATTCTTGATTTCTTCCCATTCAACACTTTTTTTTTTACTT
++
+EFF?GGGGGGBG5GG:FCB6?>FFG@DG8F?GGFFDGFD=BG>FEG>DEE
+@test_sample1-1-1707/1
+TGATAAATTTTGGAATAGTTAACAGATGATAAAAGTGTTGTTTTCAGTCA
++
+GGGCGGGF@GFGGD:GGGBC1GGG@G-EG6EGGFF2FB.GF4G1E
+@test_sample1-1-1709/1
+TTACCACCTCCAGATCTAGAGAAACAGTTTTGAGCCCTTTTATTTTGCTC
++
+GGGGFF:GGEFGGFGG;GGGGCGGGGGGFGGEGG
+@test_sample1-1-1711/1
+GGCCCTTTGTATGTTGTCTCCTGAGAAATGTCTATTCCTGTCCTCTGCCG
++
+G=GG@E@DDGGGE94CGFFEFEDGFG=FFEAGGGFGFGECE5-EAFCD7<
+@test_sample1-1-1713/1
+AAAAAATCTGAACTCATAGAAGCAGAGAATAGTATGATGGTTACTAGGGT
++
+G3GEGGG;FG?GDGGDGAGGG(GGEEBGG=G>FCGCGGEFDFFFF?9$;G
+@test_sample1-1-1715/1
+GTTCAGATTTTTTAGATGCTAAGTGAGATCATGCAATATTTGTCTTTCTG
++
+>FCGFDFGCGFGGEGG?FAGGGGGCFGBGDCGEFGGDEGG5CFGFFFFGG
+@test_sample1-1-1717/1
+GAGGACAGGAATAGACATTTCTCAGGAGACAACATACAAAGGGCCACAGA
++
+GCG9F
+@test_sample1-1-1719/1
+ATCAATGGAGGCCTGGTTACAAGGATCTAAGAAGCATCAATGGGTCACTA
++
+GEFGG*GFGGCGGGDGGGGCGFBE7FFGFF?GG=EEGFGF3GGAGEBCGG
+@test_sample1-1-1721/1
+TCCTTGTAACCAGGCCTCCATTGATTTCTCTTGGGAACCAGATCTATGAG
++
+GG7EGA>GGGGGFGEGC?FD@<6GG6GGGECAAFEFGGGGGDFGFFGGGGGGGFFEG8'%GFFEGFDF6GFF=DE
+@test_sample1-1-1725/1
+AAGCAGAAATCAAAATGATCAGTTTCCCAGTGTCACTACTGCACACCCTG
++
+)GGGGGGDGFFGGFGGFGGFEGEGGFCFEB@GGE?14GGED;EGGA8G>0
+@test_sample1-1-1727/1
+GTCCCATGAGTCAAATTTACCACCTCCAGATCTAGAGAAACAGTTTTGAG
++
+GFGGFCGGGGF>GGFFFGGGGGGD7'GFFGD6;;GGGG@#E*;E>EGBGF
+@test_sample1-1-1729/1
+CATACTATTCTCTGCTTCTATGAGTTCAGATTTTTTAGATGCTAAGTGAG
++
+>FGFCGBBFGGE>FEGGDGCGGF?GGFGGGG5FGACEFGFG;EEBC8GGD
+@test_sample1-1-1731/1
+TAAACCCTGCCTCCAGACACATTGTCTGGAATCAGGATGTCTACATGATT
++
+GGGGEGG;GFDGGGE=GFBFFFCFGFDEGEFGGF?GGFDE:FG(CD%GGG
+@test_sample1-1-1733/1
+ACTCAGTACATAAAGTTTCCTTTCCAGTTGCTGAAGGAATTTATAGTGGC
++
+GGFGFGFFA6GBGE@GGGEGGFE@GGAFGGEGG0GDDGFG:C=@6B@
+@test_sample1-1-1741/1
+TATCAGAAGAGTTACTTGGTGATGGAAAGTAAGGAAAAAAAGAAAAATGC
++
+G-GG,GG###FG=EGGG@A97BG?F
+@test_sample1-1-1743/1
+TTTTTAATTTGTACATGATGCAAGGGTTCCCTTTTCTATCATCCTCTCTG
++
+BGGFFF7FG;<+:GGCFFGGGFFCFGGGCFGCEG2A1AFGFDFCCA5FGG
+@test_sample1-1-1745/1
+TATGGATGGTGCTTCTACCATACCCTCTTCCTCTGCCAAACTTCCTTGAT
++
+G3GGGGGGGG@G=CGGDGFFGGDEGFGGFG>GGEF@GEGG@%DBBF.EGE
+@test_sample1-1-1747/1
+TTTTTTAGATGCTAAGTGAGATCATGCAATATTTGTCTTTCTGTGTCTAG
++
+G9EDG8G?7G=F6FEDGGBGGGGFGGGGF3GEGFF=DGCFGGGGBC3DG>
+@test_sample1-1-1749/1
+CTGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTAGCAATGT
++
+BGGFEGGFFF@GGG9:GGGEFGDGG:EGGGF4GGGGF:GG?76FGA@@GG
+@test_sample1-1-1751/1
+ACATTTTCAGTGTTGAATGATAAATTTTGGAATAGTTAACAGATGATAAA
++
+GFF:GGEF=GGDGEGGG=G9FGFGDGEGGFGFGGGGEGG@6FFFFDG@GGGF0FEAF7:CDG/8G
+@test_sample1-1-1757/1
+GCCAAGATATAGAATCAACCTAACTGTTCATAGACAGATGAATGGATAAA
++
+GGGEGGGGDGGGGEGG2GGGFFGGF>DGFGF?5GGGGGGG3EG1*GGGC9
+@test_sample1-1-1759/1
+AGCCTGTGAGGAAACAGACACTCATACTCTGCAAAGCAATGACTAAGATA
++
+GG@GFGGGFGFFFGGGFGFFFGC8G8?FFGECE*7EGC>;G99FFGF0FF
+@test_sample1-1-1761/1
+TACCATGAGCTTACTTAGATTAACTGAGTTGAGGGAAACTAAGACGTCAT
++
+GFGGFFGFFFGGGGGGGGGE/&ECGGFBGGFGGGGEA3;9FGG+1ECD:G
+@test_sample1-1-1763/1
+ATGAGCTTACTTAGATTAACTGAGTTGAGGGAAACTAAGACGTCATCCAC
++
+F=GFDGGGGG=?F6FEGGGFDDEGCGDFEFGCFGA9FFGG6BGE-G;7G9DC5DFGGGGC9G5=BAGF
+@test_sample1-1-1767/1
+GGATCTAAGAAGCATCAATGGGTCACTAACATCTAGTGGTACTAATTAAC
++
+GGFGGGGGFGGDBG8AFGGFCEGDGCGG;*7>?3G<9EFGC3ECFGFG?/
+@test_sample1-1-1769/1
+TGACTGAAAACAACACTTTTATCATCTGTTAACTATTCCAAAATTTATCA
++
+GGGCGFEGGGGE?GAFGDFGGGFEG@BGGG=FFEGEG,)GGG9<;AFE?9
+@test_sample1-1-1771/1
+AATCATGTAGACATCCTGATTCCAGACAATGTGTCTGGAGGCAGGGTTTA
++
+GGFFFGGGA?GG:FGDFFGGBFGGGFBEDFCGGGCFGGCFGCFC6E8DFG
+@test_sample1-1-1773/1
+TGATCCAAGAATCCTACTTCTGGGTATATAGCCAAAGGAATTGAAATCAA
++
+G*EGGGGGGFGFGFGF7EGG4BFFGGGGGGEFGGFGGGFGGFGFE71?<=
+@test_sample1-1-1775/1
+TCTTCCTTGTTTCTAAAAGAATATCTTGACACACATTAAGCTCACTGACC
++
+FFEGGGGG1GCGG4GFF@GGAG/GG1FGEF0%:GEFGGAFBFDFGGF8BGG7.GE=BFBGBF
+@test_sample1-1-1781/1
+TGAAGCTGATGTTGTCTGCAATAATGCTAAGAATCTTAAAGTGTTGAATT
++
+GEGGBGGGGGBGGEGGGFGGEFFEGGFFE?GGEGGCE-A;CGEG38FG5G
+@test_sample1-1-1783/1
+TTTCTTCCCAATGATTAACAGAGTTAATTAGTACCACTAGATGTTAGTGA
++
+GFGGEEFGGGFFGEGGGFGGFEGGGGFGFDGFE8GECGEA?DDGGGGGE?
+@test_sample1-1-1785/1
+ACAGTGTGATGTTTTAATATATGTGTATGTTGTTGAATGAATAAATCAAG
++
+5FGGGEFEFFGFGGGGF=GFFG3GGGGDG@F?8FGFFGF:D:GEGGFEE@
+@test_sample1-1-1787/1
+TAAAAATTATTTTTTAATTGACAAAAATTATATATATTTATGGAATACAG
++
+G>FCGGGGGCGG=EFFEGDGGGFE'GGG34GGBA4FDG0AEDG;9GGFGEGGGGCGGFFFGGG=GF>GEA%
+@test_sample1-1-1791/1
+TCATTTTGATTTCTGCTTTCTAGTAGAATCAGCTCCAAGACAAAAGTATA
++
+(GGGG>GGGGFFDGFGEDGG+EGCFGEFFFFGGDFG;DFGGF@BBGF2
+@test_sample1-1-1797/1
+ATCTGGTTCCCAAGAGAAATCAATGGAGGCCTGGTTACAAGGATCTAAGA
++
+GG@EGAFGGGGGGGF*GF;D78GGGGAGFGEFEDG&AA2GGF3),
+@test_sample1-1-1799/1
+ACAGAATTGCCTACTGTTTTTAAGGCTGAATAATTTTCCATATCACATTT
++
+D?=AG7GGGGG<@GFGGGA.GFG0FG;@EGFG>FE758FGCCGC?E97*6
+@test_sample1-1-1801/1
+GCACACCCTGGAACAGAACAGGTAGGTCAGAAAAACGCTCCCAAAGTTTA
++
+GF@GG:F*BG=GFG8FEGGE9FCFFFBCG4GGGEGFF3>GG%GFFDC7FE
+@test_sample1-1-1803/1
+GATCTGACATAATTATCTTAGTCATTGCTTTGCAGAGTATGAGTGTCTGT
++
+G5EFGGGGGFFGGGG?DG;@GGGGG@CFGBGGG6&CEGGF=CGFGDFD99
+@test_sample1-1-1805/1
+CACTTAGCTTAATGTCCTCTAGGTTCATCCACGTTGTCTCAAATGACAGA
++
+F;GCGGEFGGGAG:EGGFGDDGFEFBFFGDEGDFGE76CBGBF7@FEF;4
+@test_sample1-1-1807/1
+GTTGAAGGGAAATGTAAGAAGAGAGAGATTGCCTTGACATTGCTAAACTT
++
+AFGG@CGFEGGGGD@AGEFDGDG@G:DCBDEFGGGGEEFAFEF;>FBC;F
+@test_sample1-1-1809/1
+TTTGAATATTCTTCATATGTTTATTTTTTATGTGGATTTCTTGTTTTGTC
++
+FFGGG:=;G9FCGGFGGEFGGFGF8GGGE:GGF;GGEFEGADEG;G?FGGAF@3DAGEE
+@test_sample1-1-1817/1
+GATGTTGTCTGCAATAATGCTAAGAATCTTAAAGTGTTGAATTAGAAGTT
++
+GEGG2FG@D=CGGEGFGGFGFFGGG,GGGECFFDEF4GGFDGEDGGEGBE
+@test_sample1-1-1819/1
+CTGCTGCTCAAAAAAAAAAAAAAGTTAATACAACTCTGAGCACTCAGTAC
++
+GBFF74>EGGFGF<53DD=<
+@test_sample1-1-1823/1
+AATGGATTAATTCTTCTGTATAATTTTTTTAATCTGAAAATGTCAGCTTT
++
+GFFGGBCDEGFGAG6>FFG>GAF0FFGGG9F<GBGGCGGGFGGFF;F>FGAD?DGF')FF:E>AGGFFBFGE@FGFGGFG5FFFG3(GGGGFGDF891G:D.FE
+@test_sample1-1-1835/1
+CTAAACTTTGGGAGCGTTTTTCTGACCTACCTGTTCTGTTCCAGGGTGTG
++
+GFDGEGG$GDFFGFGGGGGGGGGGGFGDGBFGFDGFBGGCFGEECF3F$C
+@test_sample1-1-1837/1
+TAACACAAATATGTCAAAAGTAAAATAAATAAATAAATAAATAAATAAAT
++
+GDFEFGGEGGFGGD>FGF<;GFEGCF>>@GGD9GGF/6FAE@GGGFCG??
+@test_sample1-1-1839/1
+CAAATAGTGATGTTGAGCATTTTTTGATGTATCTGTGGCCCTTTGTATGT
++
+G?9GDCB?EDFG;C@4GFFFEBFGGGGG7GFFGCBDGFGF=FFD(GGGGFGCFFG?DGGG5@GGGG8>48=E
+@test_sample1-1-1855/1
+TACAGAAACAAATGAGTGTGACTGGGTTCCAGTGAAACTTTATTTACAAA
++
+4EGG@GGBGGGF;@GGF==>G1DAGGFGG=EGFFGFFFCFDEG?EGEGGAFEGDGGGFE>FGGGFFFAFE=GGGFGGC+?DGDFE
+@test_sample1-1-1861/1
+GATGTCCCCTCAAACCTGTTAGAATGGCTATTATCAAAAAGATGAAAGAT
++
+EGGGFGG>8BDGD