From 545c79befe9ab1519fe2e1a647e8ce0c10f8b99a Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Mon, 11 Dec 2023 10:27:28 +0100 Subject: [PATCH 01/11] update WFA2_lib to version 2.3.4 --- _custom_build.py | 2 +- pywfa/WFA2_lib/.gitignore | 43 + pywfa/WFA2_lib/CMakeLists.txt | 226 +++ pywfa/WFA2_lib/Makefile | 45 +- pywfa/WFA2_lib/README.md | 196 ++- pywfa/WFA2_lib/VERSION | 1 - pywfa/WFA2_lib/VERSION.txt | 1 + pywfa/WFA2_lib/alignment/affine2p_penalties.h | 2 - pywfa/WFA2_lib/alignment/affine_penalties.h | 2 - pywfa/WFA2_lib/alignment/cigar.c | 635 +++++++-- pywfa/WFA2_lib/alignment/cigar.h | 103 +- pywfa/WFA2_lib/alignment/score_matrix.c | 5 +- pywfa/WFA2_lib/alignment/score_matrix.h | 1 - pywfa/WFA2_lib/system/mm_allocator.c | 1 + pywfa/WFA2_lib/system/mm_allocator.h | 1 + pywfa/WFA2_lib/system/mm_stack.c | 1 + pywfa/WFA2_lib/system/profiler_counter.c | 1 + pywfa/WFA2_lib/system/profiler_counter.h | 3 +- pywfa/WFA2_lib/system/profiler_timer.c | 1 + pywfa/WFA2_lib/system/profiler_timer.h | 2 +- pywfa/WFA2_lib/utils/Makefile | 1 - pywfa/WFA2_lib/utils/bitmap.c | 4 +- pywfa/WFA2_lib/utils/bitmap.h | 1 - pywfa/WFA2_lib/utils/commons.h | 7 +- pywfa/WFA2_lib/utils/dna_text.c | 2 +- pywfa/WFA2_lib/utils/dna_text.h | 2 - pywfa/WFA2_lib/utils/heatmap.c | 4 +- pywfa/WFA2_lib/utils/heatmap.h | 2 +- pywfa/WFA2_lib/utils/sequence_buffer.c | 2 +- pywfa/WFA2_lib/utils/sequence_buffer.h | 1 - pywfa/WFA2_lib/utils/string_padded.c | 139 -- pywfa/WFA2_lib/utils/vector.c | 2 +- pywfa/WFA2_lib/utils/vector.h | 3 +- pywfa/WFA2_lib/wavefront/Makefile | 6 + pywfa/WFA2_lib/wavefront/wavefront.c | 18 +- pywfa/WFA2_lib/wavefront/wavefront.h | 11 +- pywfa/WFA2_lib/wavefront/wavefront_align.c | 558 +++----- pywfa/WFA2_lib/wavefront/wavefront_align.h | 30 +- pywfa/WFA2_lib/wavefront/wavefront_aligner.c | 785 +++++++---- pywfa/WFA2_lib/wavefront/wavefront_aligner.h | 179 +-- .../WFA2_lib/wavefront/wavefront_attributes.c | 16 +- .../WFA2_lib/wavefront/wavefront_attributes.h | 36 +- .../WFA2_lib/wavefront/wavefront_backtrace.c | 146 +- .../wavefront/wavefront_backtrace_buffer.c | 49 +- .../wavefront/wavefront_backtrace_buffer.h | 17 +- .../wavefront/wavefront_backtrace_offload.c | 4 +- pywfa/WFA2_lib/wavefront/wavefront_bialign.c | 567 +++++--- pywfa/WFA2_lib/wavefront/wavefront_bialign.h | 40 +- .../WFA2_lib/wavefront/wavefront_bialigner.c | 199 +++ .../WFA2_lib/wavefront/wavefront_bialigner.h | 125 ++ .../WFA2_lib/wavefront/wavefront_components.c | 28 +- .../WFA2_lib/wavefront/wavefront_components.h | 7 +- pywfa/WFA2_lib/wavefront/wavefront_compute.c | 375 +++-- pywfa/WFA2_lib/wavefront/wavefront_compute.h | 28 +- .../wavefront/wavefront_compute_affine.c | 86 +- .../wavefront/wavefront_compute_affine.h | 2 +- .../wavefront/wavefront_compute_affine2p.c | 67 +- .../wavefront/wavefront_compute_affine2p.h | 2 +- .../wavefront/wavefront_compute_edit.c | 81 +- .../wavefront/wavefront_compute_linear.c | 82 +- pywfa/WFA2_lib/wavefront/wavefront_debug.c | 221 ++- pywfa/WFA2_lib/wavefront/wavefront_debug.h | 10 +- pywfa/WFA2_lib/wavefront/wavefront_display.c | 2 + pywfa/WFA2_lib/wavefront/wavefront_display.h | 2 - pywfa/WFA2_lib/wavefront/wavefront_extend.c | 501 ++----- pywfa/WFA2_lib/wavefront/wavefront_extend.h | 11 +- .../wavefront/wavefront_extend_kernels.c | 203 +++ .../wavefront/wavefront_extend_kernels.h | 69 + .../wavefront/wavefront_extend_kernels_avx.c | 167 +++ .../wavefront/wavefront_extend_kernels_avx.h | 47 + .../WFA2_lib/wavefront/wavefront_heuristic.c | 567 ++++---- .../WFA2_lib/wavefront/wavefront_heuristic.h | 34 +- pywfa/WFA2_lib/wavefront/wavefront_offset.h | 2 - pywfa/WFA2_lib/wavefront/wavefront_pcigar.c | 36 +- pywfa/WFA2_lib/wavefront/wavefront_pcigar.h | 16 +- .../WFA2_lib/wavefront/wavefront_penalties.c | 170 +-- .../WFA2_lib/wavefront/wavefront_penalties.h | 59 +- pywfa/WFA2_lib/wavefront/wavefront_plot.c | 325 +++-- pywfa/WFA2_lib/wavefront/wavefront_plot.h | 41 +- .../WFA2_lib/wavefront/wavefront_sequences.c | 310 ++++ .../WFA2_lib/wavefront/wavefront_sequences.h | 148 ++ pywfa/WFA2_lib/wavefront/wavefront_slab.c | 5 +- pywfa/WFA2_lib/wavefront/wavefront_slab.h | 1 - .../wavefront/wavefront_termination.c | 162 +++ .../wavefront/wavefront_termination.h | 57 + pywfa/WFA2_lib/wavefront/wavefront_unialign.c | 324 +++++ .../wavefront_unialign.h} | 58 +- pywfa/WFA2_lib/wavefront/wfa.h | 216 +++ pywfa/WFA2_lib/wavefront/wfa.hpp | 36 + pywfa/WFA_wrap.pxd | 1248 +++++++++++++---- pywfa/align.pyx | 7 +- 91 files changed, 6796 insertions(+), 3245 deletions(-) create mode 100644 pywfa/WFA2_lib/.gitignore create mode 100644 pywfa/WFA2_lib/CMakeLists.txt delete mode 100644 pywfa/WFA2_lib/VERSION create mode 100644 pywfa/WFA2_lib/VERSION.txt delete mode 100644 pywfa/WFA2_lib/utils/string_padded.c create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_bialigner.c create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_bialigner.h create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.c create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.h create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.c create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.h create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_sequences.c create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_sequences.h create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_termination.c create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_termination.h create mode 100644 pywfa/WFA2_lib/wavefront/wavefront_unialign.c rename pywfa/WFA2_lib/{utils/string_padded.h => wavefront/wavefront_unialign.h} (55%) create mode 100644 pywfa/WFA2_lib/wavefront/wfa.h create mode 100644 pywfa/WFA2_lib/wavefront/wfa.hpp diff --git a/_custom_build.py b/_custom_build.py index 7eaf0a6..e7deb16 100644 --- a/_custom_build.py +++ b/_custom_build.py @@ -75,7 +75,7 @@ def get_extra_args(flags): # this has happened on multiple OS with/without `libomp-dev` # compiler = ccompiler.new_compiler() omp = 0 #1 if has_flag(compiler, "-fopenmp") else 0 -ret = run(f"cd pywfa/WFA2_lib; make clean all BUILD_WFA_PARALLEL={omp} BUILD_MINIMAL=1", +ret = run(f"cd pywfa/WFA2_lib; make clean all BUILD_WFA_PARALLEL={omp} BUILD_TOOLS=0 BUILD_EXAMPLES=0", shell=True) if ret.returncode != 0: print("Unable to build WFA2_lib") diff --git a/pywfa/WFA2_lib/.gitignore b/pywfa/WFA2_lib/.gitignore new file mode 100644 index 0000000..6148fb8 --- /dev/null +++ b/pywfa/WFA2_lib/.gitignore @@ -0,0 +1,43 @@ +lib/ +bin/ +build/ + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Test output files +tests/wfa.utest.log.correct +tests/wfa.utest.log.mem +tests/wfa.utest.log.time + + diff --git a/pywfa/WFA2_lib/CMakeLists.txt b/pywfa/WFA2_lib/CMakeLists.txt new file mode 100644 index 0000000..acb3d90 --- /dev/null +++ b/pywfa/WFA2_lib/CMakeLists.txt @@ -0,0 +1,226 @@ +# For Debian currently with +# +# cd build +# cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. +# make +# make test +# make install +# See below option statements and the README for build information + +cmake_minimum_required(VERSION 3.16) +project(wfa2lib) + +set(CMAKE_CXX_STANDARD 17) + +include(FeatureSummary) +include(GNUInstallDirs) + +find_package(PkgConfig REQUIRED) + +feature_summary( + FATAL_ON_MISSING_REQUIRED_PACKAGES + WHAT REQUIRED_PACKAGES_NOT_FOUND) + +# ---- Options + +option(OPENMP "Enable OpenMP" OFF) # enables WFA_PARALLEL +option(PROFILING "Enable profiling" OFF) +option(ASAN "Use address sanitiser" OFF) +option(EXTRA_FLAGS "Add optimization flags for C/C++ compiler" OFF) + +# include(CheckIPOSupported) # adds lto +# check_ipo_supported(RESULT ipo_supported OUTPUT output) + +# ---- Dependencies + +if(OPENMP) + include(FindOpenMP) + set(OPTIMIZE_FLAGS "-DWFA_PARALLEL") +endif(OPENMP) + +if(EXTRA_FLAGS) + set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} ${EXTRA_FLAGS}") +endif(EXTRA_FLAGS) + +find_package(Threads) +set_package_properties(Threads PROPERTIES TYPE REQUIRED) + +# ---- Build switches +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +# set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ${ipo_supported}) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING + "Choose the type of build, options are: Release|Debug|RelWithDebInfo (for distros)." FORCE) +endif() + +if (${CMAKE_BUILD_TYPE} MATCHES Release) + set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} -march=native -D_FILE_OFFSET_BITS=64") +endif() + +if ((${CMAKE_BUILD_TYPE} MATCHES Release) OR (${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo)) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPTIMIZE_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTIMIZE_FLAGS}") +endif () + +if (${CMAKE_BUILD_TYPE} MATCHES "Debug") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPTIMIZE_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTIMIZE_FLAGS}") + add_definitions(-Wfatal-errors) +endif () + +if (ASAN) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fno-common") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fno-common") +endif(ASAN) + +if(PROFILING) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") +endif(PROFILING) + +if(GPROF) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") +endif(GPROF) + +# ---- Include files + +file(GLOB INCLUDES + wavefront/*.h* + ) +file(GLOB UTILS_INCLUDES + utils/*.h* + ) +file(GLOB ALIGNMENT_INCLUDES + alignment/*.h* + ) +file(GLOB SYSTEM_INCLUDES + system/*.h* + ) + +set(wfa2lib_SOURCE + wavefront/wavefront_align.c + wavefront/wavefront_aligner.c + wavefront/wavefront_attributes.c + wavefront/wavefront_backtrace_buffer.c + wavefront/wavefront_backtrace.c + wavefront/wavefront_backtrace_offload.c + wavefront/wavefront_bialign.c + wavefront/wavefront_bialigner.c + wavefront/wavefront.c + wavefront/wavefront_components.c + wavefront/wavefront_compute_affine2p.c + wavefront/wavefront_compute_affine.c + wavefront/wavefront_compute.c + wavefront/wavefront_compute_edit.c + wavefront/wavefront_compute_linear.c + wavefront/wavefront_debug.c + wavefront/wavefront_display.c + wavefront/wavefront_extend.c + wavefront/wavefront_heuristic.c + wavefront/wavefront_pcigar.c + wavefront/wavefront_penalties.c + wavefront/wavefront_plot.c + wavefront/wavefront_sequences.c + wavefront/wavefront_slab.c + wavefront/wavefront_unialign.c + wavefront/wavefront_termination.c + wavefront/wavefront_extend_kernels_avx.c + wavefront/wavefront_extend_kernels.c + system/mm_stack.c + system/mm_allocator.c + system/profiler_counter.c + system/profiler_timer.c + utils/bitmap.c + utils/dna_text.c + utils/sequence_buffer.c + utils/vector.c + utils/commons.c + utils/heatmap.c + alignment/affine2p_penalties.c + alignment/affine_penalties.c + alignment/cigar.c + alignment/score_matrix.c +) + +add_library(wfa2_static + ${wfa2lib_SOURCE} + ) +add_library(wfa2 SHARED ${wfa2lib_SOURCE}) +set_target_properties(wfa2_static PROPERTIES OUTPUT_NAME wfa2) +set_target_properties(wfa2 PROPERTIES SOVERSION 0) +target_include_directories(wfa2 PUBLIC . wavefront utils) +target_include_directories(wfa2_static PUBLIC . wavefront utils) +add_library(wfa2::wfa2 ALIAS wfa2) +add_library(wfa2::wfa2_static ALIAS wfa2_static) + +if(OPENMP) + target_link_libraries(wfa2_static PRIVATE OpenMP::OpenMP_C) + target_link_libraries(wfa2 PRIVATE OpenMP::OpenMP_C) +endif(OPENMP) + +# ---- C++ binding library + +set(wfa2cpp_SOURCE + bindings/cpp/WFAligner.cpp +) +file(GLOB CPP_INCLUDES + bindings/cpp/*.h* + ) +add_library(wfa2cpp_static STATIC ${wfa2cpp_SOURCE}) +add_library(wfa2cpp SHARED ${wfa2cpp_SOURCE}) +set_target_properties(wfa2cpp PROPERTIES SOVERSION 0) +set_target_properties(wfa2cpp_static PROPERTIES OUTPUT_NAME wfa2cpp) +target_link_libraries(wfa2cpp PUBLIC wfa2) +target_link_libraries(wfa2cpp_static PUBLIC wfa2_static) +add_library(wfa2::wfa2cpp ALIAS wfa2cpp) +add_library(wfa2::wfa2cpp_static ALIAS wfa2cpp_static) + +if(OPENMP) + target_link_libraries(wfa2cpp_static PRIVATE OpenMP::OpenMP_CXX) + target_link_libraries(wfa2cpp PRIVATE OpenMP::OpenMP_CXX) +endif(OPENMP) + +# ---- Get version + +file (STRINGS "VERSION.txt" BUILD_NUMBER) +add_definitions(-DWFA2LIB_VERSION="${BUILD_NUMBER}") +add_definitions(-DVERSION="${BUILD_NUMBER}") + +set(wfa2lib_LIBS +) + +# add_dependencies(wfa2lib ${wfa2lib_DEPS}) + +# ---- Build all + +# ---- Test + +enable_testing() + + +function(add_wfa_test) + add_test( + NAME wfa2lib + COMMAND ./tests/wfa.utest.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) +endfunction() + +add_wfa_test() + +# ---- Install + +# Do not install anything when used with FetchContent +if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + install(TARGETS wfa2_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) + install(TARGETS wfa2 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) + + install(FILES ${INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/wavefront) + install(FILES ${UTILS_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/utils) + install(FILES ${ALIGNMENT_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/alignment) + install(FILES ${SYSTEM_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/system) + + install(TARGETS wfa2cpp ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(TARGETS wfa2cpp_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(FILES ${CPP_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/bindings/cpp) +endif() diff --git a/pywfa/WFA2_lib/Makefile b/pywfa/WFA2_lib/Makefile index 4ee8080..ea25df7 100644 --- a/pywfa/WFA2_lib/Makefile +++ b/pywfa/WFA2_lib/Makefile @@ -5,6 +5,7 @@ FOLDER_BIN=bin FOLDER_BUILD=build FOLDER_BUILD_CPP=build/cpp FOLDER_LIB=lib +FOLDER_TESTS=tests UNAME=$(shell uname) @@ -16,17 +17,14 @@ CC_FLAGS=-Wall -g -fPIC AR=ar AR_FLAGS=-rsc -ifndef BUILD_EXAMPLES -BUILD_EXAMPLES=0 +ifndef BUILD_EXAMPLES +BUILD_EXAMPLES=1 endif -ifndef BUILD_TOOLS +ifndef BUILD_TOOLS BUILD_TOOLS=1 endif ifndef BUILD_WFA_PARALLEL -BUILD_WFA_PARALLEL=1 -endif -ifndef BUILD_MINIMAL -BUILD_MINIMAL=0 +BUILD_WFA_PARALLEL=0 endif ############################################################################### @@ -38,18 +36,12 @@ SUBDIRS=alignment \ system \ utils \ wavefront -ifeq ($(BUILD_MINIMAL),0) - SUBDIRS+=bindings/cpp +ifeq ($(BUILD_TOOLS),1) + APPS+=tools/generate_dataset \ + tools/align_benchmark endif - -ifeq ($(BUILD_MINIMAL),0) - ifeq ($(BUILD_TOOLS),1) - APPS+=tools/generate_dataset \ - tools/align_benchmark - endif - ifeq ($(BUILD_EXAMPLES),1) - APPS+=examples - endif +ifeq ($(BUILD_EXAMPLES),1) + APPS+=examples endif all: CC_FLAGS+=-O3 -march=native #-flto -ffat-lto-objects @@ -69,29 +61,26 @@ asan: build # Build rules ############################################################################### build: setup -build: $(SUBDIRS) -build: lib_wfa +build: $(SUBDIRS) +build: lib_wfa build: $(APPS) setup: - $( if ($(BUILD_MINIMAL),0) $(@mkdir -p $(FOLDER_BUILD_CPP))) - @mkdir -p $(FOLDER_BIN) $(FOLDER_BUILD) $(FOLDER_LIB) - + @mkdir -p $(FOLDER_BIN) $(FOLDER_BUILD) $(FOLDER_BUILD_CPP) $(FOLDER_LIB) + lib_wfa: $(SUBDIRS) $(AR) $(AR_FLAGS) $(LIB_WFA) $(FOLDER_BUILD)/*.o 2> /dev/null - $( if ($(BUILD_MINIMAL),0) $(AR) $((AR_FLAGS) $(LIB_WFA_CPP) $(FOLDER_BUILD)/*.o $(FOLDER_BUILD_CPP)/*.o 2> /dev/null)) clean: - rm -rf $(FOLDER_BIN) $(FOLDER_BUILD) $(FOLDER_LIB) - $(MAKE) - + rm -rf $(FOLDER_BIN) $(FOLDER_BUILD) $(FOLDER_LIB) 2> /dev/null + ############################################################################### # Subdir rule ############################################################################### export $(SUBDIRS): $(MAKE) --directory=$@ all - + $(APPS): $(MAKE) --directory=$@ all diff --git a/pywfa/WFA2_lib/README.md b/pywfa/WFA2_lib/README.md index a584a03..b3257fc 100644 --- a/pywfa/WFA2_lib/README.md +++ b/pywfa/WFA2_lib/README.md @@ -12,11 +12,35 @@ The wavefront alignment (WFA) algorithm is an **exact** gap-affine algorithm tha ### 1.2 What is WFA2-lib? -The WFA2 library implements the WFA algorithm for different distance metrics and alignment modes. It supports various [distance functions](#wfa2.distances): indel, edit, gap-lineal, gap-affine, and dual-gap gap-affine distances. The library allows computing only the score or the complete alignment (i.e., CIGAR) (see [Alignment Scope](#wfa2.scope)). Also, the WFA2 library supports computing end-to-end alignments (a.k.a. global-alignment) and ends-free alignments (including semi-global, glocal, and extension alignment) (see [Alignment Span](#wfa2.span)). In the case of long and noisy alignments, the library provides different [low-memory modes](#wfa2.mem) that significantly reduce the memory usage of the naive WFA algorithm implementation. Beyond the exact-alignment modes, the WFA2 library implements [heuristic modes](#wfa2.heuristics) that dramatically accelerate the alignment computation. Additionally, the library provides many other support functions to display and verify alignment results, control the overall memory usage, and more. +The WFA2 library implements the WFA algorithm for different distance metrics and alignment modes. It supports various [distance functions](#wfa2.distances): indel, edit, gap-linear, gap-affine, and dual-gap gap-affine distances. The library allows computing only the score or the complete alignment (i.e., CIGAR) (see [Alignment Scope](#wfa2.scope)). Also, the WFA2 library supports computing end-to-end alignments (a.k.a. global-alignment) and ends-free alignments (including semi-global, glocal, and extension alignment) (see [Alignment Span](#wfa2.span)). In the case of long and noisy alignments, the library provides different [low-memory modes](#wfa2.mem) that significantly reduce the memory usage of the naive WFA algorithm implementation. Beyond the exact-alignment modes, the WFA2 library implements [heuristic modes](#wfa2.heuristics) that dramatically accelerate the alignment computation. Additionally, the library provides many other support functions to display and verify alignment results, control the overall memory usage, and more. ### 1.3 Getting started -Git clone and compile the library, tools, and examples. +Git clone and compile the library, tools, and examples (by default, use cmake). + +``` +git clone https://github.com/smarco/WFA2-lib +cd WFA2-lib +mkdir build && cd build +cmake .. -DCMAKE_BUILD_TYPE=Release +cmake --build . --verbose +ctest . --verbose +``` + +There are some flags that can be used. For instance: + +``` +cmake .. -DOPENMP=TRUE +cmake .. -DCMAKE_BUILD_TYPE=Release -DEXTRA_FLAGS="-ftree-vectorizer-verbose=5" +``` + +To build a shared library (static is the default). + +``` +cmake -DBUILD_SHARED_LIBS=ON +``` + +Alternatively, the Makefile build system can be used. ``` $> git clone https://github.com/smarco/WFA2-lib @@ -24,6 +48,8 @@ $> cd WFA2-lib $> make clean all ``` +Also, it is possible to build WFA2-lib in a GNU Guix container, for more information see [guix.scm](./guix.scm). + ### 1.4 Contents (where to go from here) Section [WFA2-lib features](#wfa2.features) explores the most relevant options and features of the library. Then, the folder [tools/](tools/README.md) contains tools that can be used to execute and understand the WFA2 library capabilities. Additionally, the folder [examples/](examples/README.md) contains simple examples illustrating how to integrate the WFA2 code into any tool. @@ -40,7 +66,7 @@ Section [WFA2-lib features](#wfa2.features) explores the most relevant options a * [Technical notes](#wfa2.other.notes) * [Reporting Bugs and Feature Request](#wfa2.complains) * [License](#wfa2.licence) -* [Citation](#wfa2.cite) +* [Citation](#wfa2.cite) ### 1.5 Important notes and clarifications @@ -62,7 +88,7 @@ This simple example illustrates how to align two sequences using the WFA2 librar #include "wavefront/wavefront_align.h" ``` -Next, create and configure the WFA alignment object. The following example uses the defaults configuration and sets custom `gap_affine` penalties. Note that mismatch, gap-opening, and gap-extension must be positive values. +Next, create and configure the WFA alignment object. The following example uses the defaults configuration and sets custom `gap_affine` penalties. Note that mismatch, gap-opening, and gap-extension must be positive values. ```C // Configure alignment attributes @@ -142,10 +168,15 @@ cout << "Alignment score " << aligner.getAlignmentScore() << endl; **IMPORTANT.** Once an alignment object is created, **it is strongly recommended to reuse it to compute multiple alignments**. Creating and destroying the alignment object for every alignment computed can have a significant overhead. Reusing the alignment object allows repurposing internal data structures, minimising the cost of memory allocations, and avoiding multiple alignment setups and precomputations. +### 2.3 Rust bindings + +Rust bindings can be generated automatically using `bindgen`, see [bindings/rust/build.rs](bindings/rust/build.rs). +An example of how to use them is [here](./bindings/rust/example.rs). + ## 3. WFA2-LIB FEATURES * **Exact alignment** method that computes the optimal **alignment score** and/or **alignment CIGAR**. -* Supports **multiple distance metrics** (i.e., indel, edit, gap-lineal, gap-affine, and dual-gap gap-affine). +* Supports **multiple distance metrics** (i.e., indel, edit, gap-linear, gap-affine, and dual-gap gap-affine). * Allows performing **end-to-end** (a.k.a. global) and **ends-free** (e.g., semi-global, extension, overlap) alignment. * Implements **low-memory modes** to reduce and control memory consumption (down to `O(s)` using the `ultralow` mode). * Supports various **heuristic strategies** to use on top of the core WFA algorithm. @@ -156,7 +187,7 @@ cout << "Alignment score " << aligner.getAlignmentScore() << endl; The WFA2 library implements the wavefront algorithm for the most widely used distance metrics. The practical alignment time can change depending on the distance function, although the computational complexity always remains proportional to the alignment score or distance. The WFA2 library offers the following distance metrics or functions: -- **Indel (or LCS).** Produces alignments allowing matches, insertions, and deletions with unitary cost (i.e., {M,I,D} = {0,1,1}) but not mismatches. Also known as the longest common subsequence (LCS) problem. The LCS is defined as the longest subsequence common to both sequences, provided that the characters of the subsequence are not required to occupy consecutive positions within the original sequences. +- **Indel (or LCS).** Produces alignments allowing matches, insertions, and deletions with unitary cost (i.e., {M,I,D} = {0,1,1}) but not mismatches. Also known as the longest common subsequence (LCS) problem. The LCS is defined as the longest subsequence common to both sequences, provided that the characters of the subsequence are not required to occupy consecutive positions within the original sequences. ``` PATTERN A-GCTA-GTGTC--AATGGCTACT-T-T-TCAGGTCCT @@ -199,7 +230,7 @@ The WFA2 library implements the wavefront algorithm for the most widely used dis // Configuration wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; attributes.distance_metric = gap_linear; - attributes.linear_penalties.mismatch = 6; // X > 0 + attributes.linear_penalties.mismatch = 6; // X > 0 attributes.linear_penalties.indel = 2; // I > 0 ``` @@ -235,9 +266,9 @@ The WFA2 library implements the wavefront algorithm for the most widely used dis ### 3.2 Alignment Scope -Depending on the use case, it is often the case that an application is only required to compute the alignment score, not the complete alignment (i.e., CIGAR). As it happens with traditional dynamic programming algorithms, the WFA algorithm requires less memory (i.e., `O(s)`) to compute the alignment score. In turn, this results in slighter faster alignment executions. For this reason, the WFA2 library implements two different modes depending on the alignment scope: score-only and full-CIGAR alignment. +Depending on the use case, it is often the case that an application is only required to compute the alignment score, not the complete alignment (i.e., CIGAR). As it happens with traditional dynamic programming algorithms, the WFA algorithm requires less memory (i.e., `O(s)`) to compute the alignment score. In turn, this results in slighter faster alignment executions. For this reason, the WFA2 library implements two different modes depending on the alignment scope: score-only and full-CIGAR alignment. -The ** score-only alignment ** mode computes only the alignment score. This mode utilizes only the front-wavefronts of the WFA algorithm to keep track of the optimal alignment score. As a result, it requires `O(s)` memory and, in practice, performs slighter faster than the standard full-CIGAR mode. +The ** score-only alignment ** mode computes only the alignment score. This mode utilizes only the front-wavefronts of the WFA algorithm to keep track of the optimal alignment score. As a result, it requires `O(s)` memory and, in practice, performs slighter faster than the standard full-CIGAR mode. ```C wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; @@ -273,7 +304,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt ``` PATTERN AATTAATTTAAGTCTAGGCTACTTTCGGTACTTTGTTCTT - |||||||||||||||||||||||||||||| || + |||||||||||||||||||||||||||||| || TEXT ----AATTTAAGTCTAGGCTACTTTCGGTACTTTCTT--- ``` @@ -295,7 +326,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt ``` PATTERN -------------AATTTAAGTCTAGGCTACTTTC--------------- - ||||||||| |||||||||||| + ||||||||| |||||||||||| TEXT ACGACTACTACGAAATTTAAGTATAGGCTACTTTCCGTACGTACGTACGT ``` @@ -324,9 +355,9 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.pattern_end_free = pattern_end_free; attributes.alignment_form.text_begin_free = 0; attributes.alignment_form.text_end_free = text_end_free; - + PATTERN AATTTAAGTCTG-CTACTTTCACGCA-GCT---------- - ||||| |||||| ||||||||||| | | | + ||||| |||||| ||||||||||| | | | TEXT AATTTCAGTCTGGCTACTTTCACGTACGATGACAGACTCT ``` @@ -338,7 +369,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.pattern_end_free = 0; attributes.alignment_form.text_begin_free = text_begin_free; attributes.alignment_form.text_end_free = 0; - + PATTERN -------------AAACTTTCACGTACG-TGACAGTCTCT ||||||||||||| |||||| |||| TEXT AATTTCAGTCTGGCTACTTTCACGTACGATGACAGACTCT @@ -350,7 +381,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt
Overlapped alignment

-- **Overlapped alignment (a.k.a. dovetail).** +- **Overlapped alignment (a.k.a. dovetail).** ```C // Overlapped (Right-Left) @@ -360,9 +391,9 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.pattern_end_free = 0; attributes.alignment_form.text_begin_free = 0; attributes.alignment_form.text_end_free = text_end_free; - + PATTERN ACGCGTCTGACTGACTGACTAAACTTTCATGTAC-TGACA----------------- - ||||||||| |||| ||||| + ||||||||| |||| ||||| TEXT --------------------AAACTTTCACGTACGTGACATATAGCGATCGATGACT ``` @@ -376,7 +407,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.text_end_free = 0; PATTERN ----------------------ACGCGTCTGACTGACTACGACTACGACTGACTAGCAT - ||||||||| || || + ||||||||| || || TEXT ACATGCATCGATCAGACTGACTACGCGTCTG-CTAAC---------------------- ``` @@ -389,14 +420,16 @@ The WFA2 library implements various memory modes: `wavefront_memory_high`, `wave ```C wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; - attributes.memory_mode = wavefront_memory_med; + attributes.memory_mode = wavefront_memory_ultralow; ``` ### 3.5 Heuristic modes The WFA algorithm can be used combined with many heuristics to reduce the alignment time and memory used. As it happens to other alignment methods, heuristics can result in suboptimal solutions and loss of accuracy. Moreover, some heuristics may drop the alignment if the sequences exceed certain divergence thresholds (i.e., x-drop/z-drop). Due to the popularity and efficiency of these methods, the WFA2 library implements many of these heuristics. Note, **it is not about how little DP-matrix you compute, but about how good/accurate the resulting alignments are.** -- **None (for comparison)**. If no heuristic is used, the WFA behaves exploring cells of the DP-matrix in increasing score order (increasing scores correspond to colours from blue to red). +WFA2's heuristics are classified into the following categories: ['wf-adaptive'](#wfa2.wfadaptive), ['drops'](#wfa2.drops), and ['bands'](#wfa2.bands). It is possible to combine a maximum of one heuristic from each category (OR-ing the strategy values or using the API). In the case of using multiple heuristics, these will applied in cascade, starting with 'wf-adaptive', then 'drops', and finally 'bands'. + +- **None (for comparison)**. If no heuristic is used, the WFA behaves exploring cells of the DP-matrix in increasing score order (increasing scores correspond to colours from blue to red).

@@ -414,43 +447,83 @@ The WFA algorithm can be used combined with many heuristics to reduce the alignm attributes.heuristic.strategy = wf_heuristic_none; ``` -- **Banded alignment.** Sets a fixed band in the diagonals preventing the wavefront from growing beyond those limits. It allows setting the minimum diagonal (i.e., min_k) and maximum diagonal (i.e., max_k). +- **Heuristic wf-adaptive.** This WFA heuristic removes outer diagonals that are extremely far behind compared to other ones in the same wavefront. Unlike other methods, the adaptive-wavefront reduction heuristic prunes based on the potential of the diagonal to lead to the optimal solution without previous knowledge of the error between the sequences. + +```C + wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; + attributes.heuristic.strategy = wf_heuristic_wfadaptive; + attributes.heuristic.min_wavefront_length = 10; + attributes.heuristic.max_distance_threshold = 50; + attributes.heuristic.steps_between_cutoffs = 1; +``` + +        **Graphical examples:**

- - + + - - + +

Banded(10,10)

Banded(10,150)

Adaptive-WF(10,50)

Adaptive-WF(10,50,10)

+- **Heuristic drops.** This heuristic compares the maximum score computed so far with the score of the last computed cells. Depending on the score difference, these heuristic strategies can reduce the size of the wavefront computed or even abandon the alignment process. In the case of zero-match alignment, $M=1$ will be assumed just for computation of the score drop. Also note that this heuristic is not compatible with distances 'edit' or 'indel'. In this category, WFA2 implements 'X-drop' and 'Z-drop'. + +        **X-drop** implements the classical X-drop heuristic. For each diagonal $k$, the X-drop heuristic compares the current score $sw_k$ with the maximum observed score so far $sw_{max}$. If the difference drops more than the $xdrop$ parameter (i.e., $sw_{max} - sw_k > xdrop$), the heuristic prunes the diagonal $k$ as it is unlikely to lead to the optimum alignment. If all the diagonals are pruned under this criteria, the alignment process is abandoned. + ```C wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; - attributes.heuristic.strategy = wf_heuristic_banded_static; - attributes.heuristic.min_k = -10; - attributes.heuristic.max_k = +10; + attributes.heuristic.strategy = wf_heuristic_xdrop; + attributes.heuristic.xdrop = 100; + attributes.heuristic.steps_between_cutoffs = 100; ``` -- **Adaptive-Band alignment.** Similar to the static-band heuristic, it allows the band to move towards the diagonals closer to the end of the alignment. Unlike the static-band that is performed on each step, the adaptive-band heuristics allows configuring the number of steps between heuristic band cut-offs. +        **Z-drop** implements the Z-drop heuristic (as described in Minimap2). This heuristic halts the alignment process if the score drops too fast in the diagonal direction. Let $sw_{max}$ be the maximum observed score so far, computed at cell $(i',j')$. Then, let $sw$ be the maximum score found in the last computed wavefront, computed at cell $(i,j)$. The Z-drop heuristic stops the alignment process if $sw_{max} - sw > zdrop + gap_e·|(i-i')-(j-j')|$, being $gap_e$ the gap-extension penalty and $zdrop$ a parameter of the heuristic. + + +```C + wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; + attributes.heuristic.strategy = wf_heuristic_zdrop; + attributes.heuristic.zdrop = 100; + attributes.heuristic.steps_between_cutoffs = 100; +``` + +        **Graphical examples:**

- - + + + - - + + +

Adaptive-Band(10,10,1)

Adaptive-Band(50,50,1)

None

X-drop(200,1)

Z-drop(200,1)

+ +- **Heuristic bands.** These heuristics set a band in the diagonals preventing the wavefront from growing beyond those limits. It allows setting the minimum diagonal (i.e., min_k) and maximum diagonal (i.e., max_k). These heuristics are the most restrictive but the fastest and simplest to compute. In this category, WFA2 implements 'static-band' and 'adaptive-band'. + +        **Static-band** sets a fixed band in the diagonals preventing the wavefront from growing beyond those limits. It allows setting the minimum diagonal (i.e., min_k) and maximum diagonal (i.e., max_k). + +```C + wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; + attributes.heuristic.strategy = wf_heuristic_banded_static; + attributes.heuristic.min_k = -10; + attributes.heuristic.max_k = +10; +``` + +        **Adaptive-band** is similar to the static-band heuristic; however, it allows the band to move towards the diagonals closer to the end of the alignment. Unlike the static-band that is performed on each step, the adaptive-band heuristics allows configuring the number of steps between heuristic band cut-offs. + ```C wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; attributes.heuristic.strategy = wf_heuristic_banded_adaptive; @@ -459,52 +532,42 @@ The WFA algorithm can be used combined with many heuristics to reduce the alignm attributes.heuristic.steps_between_cutoffs = 1; ``` -- **Adaptive-Wavefront alignment.** This WFA heuristic removes outer diagonals that are extremely far behind compared to other ones in the same wavefront. Unlike other methods, the adaptive-wavefront reduction heuristic prunes based on the potential of the diagonal to lead to the optimal solution without previous knowledge of the error between the sequences. +        **Graphical examples:**

- - + + - - + + + + + + + + + +

Adaptive-WF(10,50)

Adaptive-WF(10,50,10)

Banded(10,10)

Banded(10,150)

Adaptive-Band(10,10,1)

Adaptive-Band(50,50,1)

-```C - wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; - attributes.heuristic.strategy = wf_heuristic_wfadaptive; - attributes.heuristic.min_wavefront_length = 10; - attributes.heuristic.max_distance_threshold = 50; - attributes.heuristic.steps_between_cutoffs = 1; -``` -- **X-drop.** [Under Testing] Implements the classical X-drop heuristic to abandon diagonals (or even alignments) that fall more than X from the previous best-observed score. +### 3.6 Some technical notes -```C - wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; - attributes.heuristic.strategy = wf_heuristic_xdrop; - attributes.heuristic.xdrop = 100; - attributes.heuristic.steps_between_cutoffs = 100; -``` +- Thanks to Eizenga's formulation, WFA2-lib can operate with any match score. In practice, M=0 is often the most efficient choice. -- **Z-drop**. [Under Testing] Implements the Z-drop heuristic. It drops the diagonals (or even the alignment) if the score drops too fast in the diagonal direction. -```C - wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; - attributes.heuristic.strategy = wf_heuristic_zdrop; - attributes.heuristic.zdrop = 100; - attributes.heuristic.steps_between_cutoffs = 100; -``` +- Note that edit and LCS are distance metrics and, thus, the score computed is always positive. However, using weighted distances (e.g., gap-linear and gap-affine) the alignment score is computed using the selected penalties (i.e., the alignment score can be positive or negative). For instance, if WFA2-lib is executed using $M=0$, the final score is expected to be negative. -### 3.6 Some technical notes -- Thanks to Eizenga's formulation, WFA2-lib can operate with any match score. Although, in practice, M=0 is still the most efficient choice. +- All WFA2-lib algorithms/variants are stable. That is, for alignments having the same score, the all alignment modes always resolve ties (between M, X, I,and D) using the same criteria: M (highest prio) > X > D > I (lowest prio). Only the memory mode `ultralow` (BiWFA) resolves ties differently (although the results are still optimal). -- All WFA2-lib algorithms/variants are stable. That is, for alignments having the same score, the library always resolves ties (between M, X, I,and D) using the same criteria: M (highest prio) > X > D > I (lowest prio). Nevertheless, the memory mode `ultralow` (BiWFA) is optimal (always reports the best alignment) but not stable. + +- WFA2lib follows the convention that describes how to transform the (1) Pattern/Query into the (2) Text/Database/Reference used in classic pattern matching papers. However, the SAM CIGAR specification describes the transformation from (2) Reference to (1) Query. If you want CIGAR-compliant alignments, swap the pattern and text sequences argument when calling the WFA2lib's align functions (to convert all the Ds into Is and vice-versa). ## 4. REPORTING BUGS AND FEATURE REQUEST @@ -523,7 +586,9 @@ WFA2-lib is distributed under MIT licence. [Santiago Marco-Sola](https://github.com/smarco) (santiagomsola@gmail.com) is the main developer and the person you should address your complaints. -[Andrea Guarracino](https://github.com/AndreaGuarracino) and [Erik Garrison](https://github.com/ekg) have contributed to the design of new features and intensive testing of this library. +[Andrea Guarracino](https://github.com/AndreaGuarracino) and [Erik Garrison](https://github.com/ekg) have contributed to the design of new features and intensive testing of the library. + +[Pjotr Prins](https://thebird.nl/) contributed the CMake build system, preventing of leaking variables in include headers and other tweaks. Miquel Moretó has contributed with fruitful technical discussions and tireless efforts seeking funding, so we could keep working on this project. @@ -535,7 +600,6 @@ Miquel Moretó has contributed with fruitful technical discussions and tireless ## 8. CITATION -**Santiago Marco-Sola, Juan Carlos Moure, Miquel Moreto, Antonio Espinosa**. ["Fast gap-affine pairwise alignment using the wavefront algorithm."](https://doi.org/10.1093/bioinformatics/btaa777) Bioinformatics, 2020. - -**Santiago Marco-Sola, Jordan M Eizenga, Andrea Guarracino, Benedict Paten, Erik Garrison, Miquel Moreto**. Optimal gap-affine alignment in O(s) space. _bioRxiv_ (2022). DOI [2022.04.14.488380](https://doi.org/10.1101/2022.04.14.488380) +**Santiago Marco-Sola, Juan Carlos Moure, Miquel Moreto, Antonio Espinosa**. ["Fast gap-affine pairwise alignment using the wavefront algorithm."](https://doi.org/10.1093/bioinformatics/btaa777). Bioinformatics, 2020. +**Santiago Marco-Sola, Jordan M Eizenga, Andrea Guarracino, Benedict Paten, Erik Garrison, Miquel Moreto**. ["Optimal gap-affine alignment in O(s) space"](https://doi.org/10.1093/bioinformatics/btad074). Bioinformatics, 2023. diff --git a/pywfa/WFA2_lib/VERSION b/pywfa/WFA2_lib/VERSION deleted file mode 100644 index eb41430..0000000 --- a/pywfa/WFA2_lib/VERSION +++ /dev/null @@ -1 +0,0 @@ -v2.2 diff --git a/pywfa/WFA2_lib/VERSION.txt b/pywfa/WFA2_lib/VERSION.txt new file mode 100644 index 0000000..5c69a91 --- /dev/null +++ b/pywfa/WFA2_lib/VERSION.txt @@ -0,0 +1 @@ +v2.3 \ No newline at end of file diff --git a/pywfa/WFA2_lib/alignment/affine2p_penalties.h b/pywfa/WFA2_lib/alignment/affine2p_penalties.h index 8b66071..50e0b18 100644 --- a/pywfa/WFA2_lib/alignment/affine2p_penalties.h +++ b/pywfa/WFA2_lib/alignment/affine2p_penalties.h @@ -32,8 +32,6 @@ #ifndef AFFINE2P_PENALTIES_H_ #define AFFINE2P_PENALTIES_H_ -#include "utils/commons.h" - /* * Affine 2-piece penalties */ diff --git a/pywfa/WFA2_lib/alignment/affine_penalties.h b/pywfa/WFA2_lib/alignment/affine_penalties.h index 3d0f638..1306a62 100644 --- a/pywfa/WFA2_lib/alignment/affine_penalties.h +++ b/pywfa/WFA2_lib/alignment/affine_penalties.h @@ -32,8 +32,6 @@ #ifndef AFFINE_PENALTIES_H_ #define AFFINE_PENALTIES_H_ -#include "utils/commons.h" - /* * Affine penalties */ diff --git a/pywfa/WFA2_lib/alignment/cigar.c b/pywfa/WFA2_lib/alignment/cigar.c index 1ad1d45..dea0c12 100644 --- a/pywfa/WFA2_lib/alignment/cigar.c +++ b/pywfa/WFA2_lib/alignment/cigar.c @@ -29,29 +29,64 @@ * DESCRIPTION: Edit cigar data-structure (match/mismatch/insertion/deletion) */ +#include "utils/commons.h" #include "cigar.h" +/* + * SAM CIGAR Operations + */ +#define SAM_CIGAR_MATCH 0 +#define SAM_CIGAR_INS 1 +#define SAM_CIGAR_DEL 2 +#define SAM_CIGAR_N_SKIP 3 +#define SAM_CIGAR_EQ 7 +#define SAM_CIGAR_X 8 +/* ... */ +#define SAM_CIGAR_NA 15 + +const uint8_t sam_cigar_lut[256] = +{ + [0 ... 255] = SAM_CIGAR_NA, + ['M'] = SAM_CIGAR_MATCH, + ['I'] = SAM_CIGAR_INS, + ['D'] = SAM_CIGAR_DEL, + ['N'] = SAM_CIGAR_N_SKIP, + ['='] = SAM_CIGAR_EQ, + ['X'] = SAM_CIGAR_X, +}; + /* * Setup */ -void cigar_allocate( - cigar_t* const cigar, - const int max_operations, - mm_allocator_t* const mm_allocator) { - // Allocate buffer +cigar_t* cigar_new( + const int max_operations) { + // Allocate + cigar_t* const cigar = malloc(sizeof(cigar_t)); + // Allocate alignment-operations buffer cigar->max_operations = max_operations; - cigar->operations = mm_allocator_malloc(mm_allocator,cigar->max_operations); + cigar->operations = malloc(cigar->max_operations); cigar->begin_offset = 0; cigar->end_offset = 0; cigar->score = INT32_MIN; - // MM - cigar->mm_allocator = mm_allocator; + cigar->end_v = -1; + cigar->end_h = -1; + // CIGAR + cigar->cigar_length = 0; + cigar->cigar_buffer = calloc(max_operations,sizeof(uint32_t)); + // Return + return cigar; } void cigar_clear( cigar_t* const cigar) { + // Alignment operations cigar->begin_offset = 0; cigar->end_offset = 0; + // Score and end position cigar->score = INT32_MIN; + cigar->end_v = -1; + cigar->end_h = -1; + // CIGAR + cigar->cigar_length = 0; } void cigar_resize( cigar_t* const cigar, @@ -59,22 +94,27 @@ void cigar_resize( // Check maximum operations if (max_operations > cigar->max_operations) { cigar->max_operations = max_operations; - mm_allocator_free(cigar->mm_allocator,cigar->operations); // Free - cigar->operations = mm_allocator_malloc( - cigar->mm_allocator,max_operations); // Allocate + free(cigar->operations); // Free + free(cigar->cigar_buffer); // Free + cigar->operations = malloc(max_operations); // Allocate + cigar->cigar_buffer = calloc(max_operations,sizeof(uint32_t)); // Allocate } - cigar->begin_offset = 0; - cigar->end_offset = 0; - cigar->score = INT32_MIN; + cigar_clear(cigar); } void cigar_free( cigar_t* const cigar) { - mm_allocator_free(cigar->mm_allocator,cigar->operations); + free(cigar->operations); + free(cigar->cigar_buffer); + free(cigar); } /* * Accessors */ -int cigar_get_matches( +bool cigar_is_null( + cigar_t* const cigar) { + return (cigar->begin_offset >= cigar->end_offset); +} +int cigar_count_matches( cigar_t* const cigar) { int i, num_matches=0; for (i=cigar->begin_offset;iend_offset;++i) { @@ -82,42 +122,121 @@ int cigar_get_matches( } return num_matches; } -void cigar_add_mismatches( - char* const pattern, - const int pattern_length, - char* const text, - const int text_length, - cigar_t* const cigar) { - // Refine adding mismatches - int i, p=0, t=0; - for (i=cigar->begin_offset;iend_offset;++i) { - // Check limits - if (p >= pattern_length || t >= text_length) break; - switch (cigar->operations[i]) { - case 'M': - cigar->operations[i] = (pattern[p]==text[t]) ? 'M' : 'X'; - ++p; ++t; - break; - case 'I': - ++t; - break; - case 'D': - ++p; - break; - default: - fprintf(stderr,"[CIGAR] Wrong edit operation\n"); - exit(1); - break; +void cigar_append_forward( + cigar_t* const cigar_dst, + cigar_t* const cigar_src) { + // Parameters + const int cigar_length = cigar_src->end_offset - cigar_src->begin_offset; + char* const operations_src = cigar_src->operations + cigar_src->begin_offset; + char* const operations_dst = cigar_dst->operations + cigar_dst->end_offset; + // Append forward + memcpy(operations_dst,operations_src,cigar_length); + // Update offset + cigar_dst->end_offset += cigar_length; +} +void cigar_append_reverse( + cigar_t* const cigar_dst, + cigar_t* const cigar_src) { + // Parameters + const int begin_offset = cigar_src->begin_offset; + const int end_offset = cigar_src->end_offset; + const int cigar_length = end_offset - begin_offset; + char* const operations_src = cigar_src->operations + begin_offset; + char* const operations_dst = cigar_dst->operations + cigar_dst->end_offset; + // Append reverse + int i; + for (i=0;iend_offset += cigar_length; +} +void cigar_append_deletion( + cigar_t* const cigar, + const int length) { + // Append deletions + char* const operations = cigar->operations + cigar->end_offset; + int i; + for (i=0;iend_offset += length; +} +void cigar_append_insertion( + cigar_t* const cigar, + const int length) { + // Append insertions + char* const operations = cigar->operations + cigar->end_offset; + int i; + for (i=0;iend_offset += length; +} +/* + * SAM-compliant CIGAR + */ +void cigar_compute_CIGAR( + cigar_t* const cigar, + const bool show_mismatches) { + // Prepare CIGAR (SAM compliant) + if (cigar->cigar_length==0 || cigar->has_misms!=show_mismatches) { + const char* const operations = cigar->operations; + const int begin_offset = cigar->begin_offset; + const int end_offset = cigar->end_offset; + // Check null CIGAR + if (begin_offset >= end_offset) { + cigar->cigar_length = 0; + return; + } + // Generate CIGAR + uint32_t* const cigar_buffer = cigar->cigar_buffer; + int cigar_length = 0; + char last_op = operations[begin_offset]; + uint32_t last_op_len = 1; + int i; + for (i=begin_offset+1;ihas_misms = show_mismatches; + cigar->cigar_length = cigar_length; } - while (p < pattern_length) { cigar->operations[i++] = 'D'; ++p; }; - while (t < text_length) { cigar->operations[i++] = 'I'; ++t; }; - cigar->end_offset = i; - cigar->operations[cigar->end_offset] = '\0'; - // // DEBUG - // printf("Score=%ld\nPath-length=%" PRIu64 "\nCIGAR=%s\n", - // gaba_alignment->score,gaba_alignment->plen, - // cigar->operations); +} +void cigar_get_CIGAR( + cigar_t* const cigar, + const bool show_mismatches, + uint32_t** const cigar_buffer, + int* const cigar_length) { + // Compute CIGAR + cigar_compute_CIGAR(cigar,show_mismatches); + // Return + *cigar_buffer = cigar->cigar_buffer; + *cigar_length = cigar->cigar_length; } /* * Score @@ -131,7 +250,9 @@ int cigar_score_edit( case 'X': case 'D': case 'I': ++score; break; - default: return INT_MIN; + default: + fprintf(stderr,"[CIGAR] Computing CIGAR score: Unknown operation\n"); + exit(1); } } return score; @@ -146,7 +267,9 @@ int cigar_score_gap_linear( case 'X': score -= penalties->mismatch; break; case 'I': score -= penalties->indel; break; case 'D': score -= penalties->indel; break; - default: return INT_MIN; + default: + fprintf(stderr,"[CIGAR] Computing CIGAR score: Unknown operation\n"); + exit(1); } } return score; @@ -178,7 +301,7 @@ int cigar_score_gap_affine( } return score; } -int cigar_score_gap_affine2p_get_operations_score( +int cigar_score_gap_affine2p_score_op( const char operation, const int length, affine2p_penalties_t* const penalties) { @@ -207,15 +330,14 @@ int cigar_score_gap_affine2p( for (i=cigar->begin_offset;iend_offset;++i) { // Account for operation if (cigar->operations[i] != last_op && last_op != '\0') { - score -= cigar_score_gap_affine2p_get_operations_score(last_op,op_length,penalties); + score -= cigar_score_gap_affine2p_score_op(last_op,op_length,penalties); op_length = 0; } - // Add operation last_op = cigar->operations[i]; ++op_length; } // Account for last operation - score -= cigar_score_gap_affine2p_get_operations_score(last_op,op_length,penalties); + score -= cigar_score_gap_affine2p_score_op(last_op,op_length,penalties); return score; } /* @@ -251,41 +373,247 @@ void cigar_copy( cigar_src->operations+cigar_src->begin_offset, cigar_src->end_offset-cigar_src->begin_offset); } -void cigar_append( - cigar_t* const cigar_dst, - cigar_t* const cigar_src) { - // Append - const int cigar_length = cigar_src->end_offset - cigar_src->begin_offset; - char* const operations_src = cigar_src->operations + cigar_src->begin_offset; - char* const operations_dst = cigar_dst->operations + cigar_dst->end_offset; - memcpy(operations_dst,operations_src,cigar_length); - // Update offset - cigar_dst->end_offset += cigar_length; +void cigar_discover_mismatches( + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + cigar_t* const cigar) { + // Refine adding mismatches + int i, p=0, t=0; + for (i=cigar->begin_offset;iend_offset;++i) { + // Check limits + if (p >= pattern_length || t >= text_length) break; + switch (cigar->operations[i]) { + case 'M': + cigar->operations[i] = (pattern[p]==text[t]) ? 'M' : 'X'; + ++p; ++t; + break; + case 'I': + ++t; + break; + case 'D': + ++p; + break; + default: + fprintf(stderr,"[CIGAR] Wrong edit operation\n"); + exit(1); + break; + } + } + while (p < pattern_length) { cigar->operations[i++] = 'D'; ++p; }; + while (t < text_length) { cigar->operations[i++] = 'I'; ++t; }; + cigar->end_offset = i; + cigar->operations[cigar->end_offset] = '\0'; + // // DEBUG + // printf("Score=%ld\nPath-length=%" PRIu64 "\nCIGAR=%s\n", + // gaba_alignment->score,gaba_alignment->plen, + // cigar->operations); } -void cigar_append_deletion( +/* + * Maxtrim + * Reduce the CIGAR to the maximal scoring sequence, starting from + * the beginning, under a given distance function + * + */ +bool cigar_maxtrim_gap_linear( cigar_t* const cigar, - const int length) { - // Append deletions - char* const operations = cigar->operations + cigar->end_offset; - int i; - for (i=0;ioperations; + const int begin_offset = cigar->begin_offset; + const int end_offset = cigar->end_offset; + const int match_score = (penalties->match!=0) ? penalties->match : -1; + // Max-score + int max_score = 0, max_score_offset = begin_offset, max_end_v = 0, max_end_h = 0; + // Traverse all cigar + int score = 0, end_v = 0, end_h = 0, i; + for (i=begin_offset;imismatch; + ++end_v; ++end_h; + break; + case 'I': + score -= penalties->indel; + ++end_h; + break; + case 'D': + score -= penalties->indel; + ++end_v; + break; + } + // Compare max + if (max_score < score) { + max_score = score; + max_score_offset = i; + max_end_v = end_v; + max_end_h = end_h; + } } - // Update offset - cigar->end_offset += length; + // Keep the max-scoring part of the cigar + const bool cigar_trimmed = (max_score_offset != end_offset-1); + if (max_score == 0) { + cigar_clear(cigar); + } else { + cigar->operations[max_score_offset+1] = '\0'; + cigar->end_offset = max_score_offset + 1; + cigar->score = max_score; + cigar->end_v = max_end_v; + cigar->end_h = max_end_h; + } + // Return + return cigar_trimmed; } -void cigar_append_insertion( +bool cigar_maxtrim_gap_affine( cigar_t* const cigar, - const int length) { - // Append insertions - char* const operations = cigar->operations + cigar->end_offset; + affine_penalties_t* const penalties) { + // Parameters + const char* const operations = cigar->operations; + const int begin_offset = cigar->begin_offset; + const int end_offset = cigar->end_offset; + const int match_score = (penalties->match!=0) ? penalties->match : -1; + // Max-score + int max_score = 0, max_score_offset = begin_offset, max_end_v = 0, max_end_h = 0; + // Traverse all cigar + char last_op = '\0'; + int score = 0, end_v = 0, end_h = 0, i; + for (i=begin_offset;imismatch; + ++end_v; ++end_h; + break; + case 'I': + score -= penalties->gap_extension + ((last_op=='I') ? 0 : penalties->gap_opening); + ++end_h; + break; + case 'D': + score -= penalties->gap_extension + ((last_op=='D') ? 0 : penalties->gap_opening); + ++end_v; + break; + } + last_op = operations[i]; + // Compare max + if (max_score < score) { + max_score = score; + max_score_offset = i; + max_end_v = end_v; + max_end_h = end_h; + } + } + // Keep the max-scoring part of the cigar + const bool cigar_trimmed = (max_score_offset != end_offset-1); + if (max_score == 0) { + cigar_clear(cigar); + } else { + cigar->operations[max_score_offset+1] = '\0'; + cigar->end_offset = max_score_offset + 1; + cigar->score = max_score; + cigar->end_v = max_end_v; + cigar->end_h = max_end_h; + } + // Return + return cigar_trimmed; +} +int cigar_maxtrim_gap_affine2p_score_op( + const char operation, + const int length, + affine2p_penalties_t* const penalties, + int* const end_v, + int* const end_h) { + switch (operation) { + case 'M': { + *end_v += length; *end_h += length; + const int match_score = (penalties->match!=0) ? penalties->match : -1; + return match_score*length; + } + case 'X': + *end_v += length; *end_h += length; + return penalties->mismatch*length; + case 'D': { + *end_v += length; + const int score1 = penalties->gap_opening1 + penalties->gap_extension1*length; + const int score2 = penalties->gap_opening2 + penalties->gap_extension2*length; + return MIN(score1,score2); + } + case 'I': { + *end_h += length; + const int score1 = penalties->gap_opening1 + penalties->gap_extension1*length; + const int score2 = penalties->gap_opening2 + penalties->gap_extension2*length; + return MIN(score1,score2); + } + default: + fprintf(stderr,"[CIGAR] Computing CIGAR score: Unknown operation\n"); + exit(1); + } +} +bool cigar_maxtrim_gap_affine2p( + cigar_t* const cigar, + affine2p_penalties_t* const penalties) { + // Parameters + const char* const operations = cigar->operations; + const int begin_offset = cigar->begin_offset; + const int end_offset = cigar->end_offset; + if (begin_offset >= end_offset) return false; + // Max-score + int max_score = 0, max_score_offset = begin_offset, max_end_v = 0, max_end_h = 0; + // Traverse all cigar + char last_op = '\0'; + int score = 0, end_v = 0, end_h = 0, op_length = 0; int i; - for (i=0;iend_offset += length; + // Account for last operation + score -= cigar_maxtrim_gap_affine2p_score_op(last_op,op_length,penalties,&end_v,&end_h); + if (max_score < score) { + max_score = score; + max_score_offset = end_offset - 1; + max_end_v = end_v; + max_end_h = end_h; + } + // Keep the max-scoring part of the cigar + const bool cigar_trimmed = (max_score_offset != end_offset-1); + if (max_score == 0) { + cigar_clear(cigar); + } else { + cigar->operations[max_score_offset+1] = '\0'; + cigar->end_offset = max_score_offset + 1; + cigar->score = max_score; + cigar->end_v = max_end_v; + cigar->end_h = max_end_h; + } + // Return + return cigar_trimmed; } +/* + * Check + */ bool cigar_check_alignment( FILE* const stream, const char* const pattern, @@ -305,7 +633,7 @@ bool cigar_check_alignment( if (pattern[pattern_pos] != text[text_pos]) { if (verbose) { fprintf(stream, - "[AlignCheck] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n", + "[CIGAR] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n", pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]); } return false; @@ -318,7 +646,7 @@ bool cigar_check_alignment( if (pattern[pattern_pos] == text[text_pos]) { if (verbose) { fprintf(stream, - "[AlignCheck] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n", + "[CIGAR] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n", pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]); } return false; @@ -333,7 +661,7 @@ bool cigar_check_alignment( ++pattern_pos; break; default: - fprintf(stderr,"[AlignCheck] Unknown edit operation '%c'\n",operations[i]); + fprintf(stream,"[CIGAR] Unknown edit operation '%c'\n",operations[i]); exit(1); break; } @@ -342,7 +670,7 @@ bool cigar_check_alignment( if (pattern_pos != pattern_length) { if (verbose) { fprintf(stream, - "[AlignCheck] Alignment incorrect length (pattern-aligned=%d,pattern-length=%d)\n", + "[CIGAR] Alignment incorrect length (pattern-aligned=%d,pattern-length=%d)\n", pattern_pos,pattern_length); } return false; @@ -350,7 +678,7 @@ bool cigar_check_alignment( if (text_pos != text_length) { if (verbose) { fprintf(stream, - "[AlignCheck] Alignment incorrect length (text-aligned=%d,text-length=%d)\n", + "[CIGAR] Alignment incorrect length (text-aligned=%d,text-length=%d)\n", text_pos,text_length); } return false; @@ -365,78 +693,108 @@ void cigar_print( FILE* const stream, cigar_t* const cigar, const bool print_matches) { - // Check null CIGAR - if (cigar->begin_offset >= cigar->end_offset) return; - // Print operations - char last_op = cigar->operations[cigar->begin_offset]; - int last_op_length = 1; - int i; - for (i=cigar->begin_offset+1;iend_offset;++i) { - if (cigar->operations[i]==last_op) { - ++last_op_length; - } else { - if (print_matches || last_op != 'M') { - fprintf(stream,"%d%c",last_op_length,last_op); - } - last_op = cigar->operations[i]; - last_op_length = 1; - } - } - if (print_matches || last_op != 'M') { - fprintf(stream,"%d%c",last_op_length,last_op); - } + // Check null + if (cigar_is_null(cigar)) return; + // Generate and print operations + char* const buffer = malloc(2*(cigar->end_offset-cigar->begin_offset)+10); + cigar_sprint(buffer,cigar,print_matches); + fprintf(stream,"%s",buffer); // Print + // Free + free(buffer); } int cigar_sprint( - char* buffer, + char* const buffer, cigar_t* const cigar, const bool print_matches) { - // Parameters - int pos = 0; - // Check null CIGAR - if (cigar->begin_offset >= cigar->end_offset) { - buffer[pos] = '\0'; - return pos; + // Check null + if (cigar_is_null(cigar)) { + buffer[0] = '\0'; + return 0; } + // Parameters + const char* const operations = cigar->operations; + const int begin_offset = cigar->begin_offset; + const int end_offset = cigar->end_offset; // Print operations - char last_op = cigar->operations[cigar->begin_offset]; + char last_op = operations[begin_offset]; int last_op_length = 1; - int i; - for (i=cigar->begin_offset+1;iend_offset;++i) { - if (cigar->operations[i]==last_op) { + int i, cursor = 0; + for (i=begin_offset+1;ioperations[i]; + last_op = operations[i]; last_op_length = 1; } } if (print_matches || last_op != 'M') { - pos += sprintf(buffer+pos,"%d%c",last_op_length,last_op); + cursor += sprintf(buffer+cursor,"%d%c",last_op_length,last_op); + } + // Return + buffer[cursor] = '\0'; + return cursor; +} +void cigar_print_SAM_CIGAR( + FILE* const stream, + cigar_t* const cigar, + const bool show_mismatches) { + // Check null + if (cigar_is_null(cigar)) return; + // Generate and print operations + char* const buffer = malloc(2*(cigar->end_offset-cigar->begin_offset)); + cigar_sprint_SAM_CIGAR(buffer,cigar,show_mismatches); + fprintf(stream,"%s",buffer); // Print + // Free + free(buffer); +} +int cigar_sprint_SAM_CIGAR( + char* const buffer, + cigar_t* const cigar, + const bool show_mismatches) { + // Get SAM CIGAR + uint32_t* cigar_buffer; + int cigar_length; + cigar_get_CIGAR(cigar,show_mismatches,&cigar_buffer,&cigar_length); + // Print CIGAR-operations + int i, cursor = 0; + for (i=0;i>4, + "MIDN---=X"[cigar_buffer[i]&0xf]); + } else { + cursor += sprintf(buffer+cursor,"%d%c", + cigar_buffer[i]>>4,'?'); + } } // Return - buffer[pos] = '\0'; - return pos; + buffer[cursor] = '\0'; + return cursor; } void cigar_print_pretty( FILE* const stream, + cigar_t* const cigar, const char* const pattern, const int pattern_length, const char* const text, - const int text_length, - cigar_t* const cigar, - mm_allocator_t* const mm_allocator) { + const int text_length) { // Parameters char* const operations = cigar->operations; + const int begin_offset = cigar->begin_offset; + const int end_offset = cigar->end_offset; // Allocate alignment buffers - const int max_buffer_length = text_length+pattern_length+1; - char* const pattern_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true); - char* const ops_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true); - char* const text_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true); + const int max_buffer_length = text_length + pattern_length + 1; + char* const mem = calloc(3*max_buffer_length,1); + char* const pattern_alg = mem; + char* const ops_alg = pattern_alg + max_buffer_length; + char* const text_alg = ops_alg + max_buffer_length; // Compute alignment buffers int i, alg_pos = 0, pattern_pos = 0, text_pos = 0; - for (i=cigar->begin_offset;iend_offset;++i) { + for (i=begin_offset;i #include "system/mm_allocator.h" #include "alignment/linear_penalties.h" #include "alignment/affine_penalties.h" @@ -42,24 +42,26 @@ * CIGAR */ typedef struct { - // Operations buffer - char* operations; - int max_operations; - int begin_offset; - int end_offset; - // Score - int score; - // MM - mm_allocator_t* mm_allocator; + // Alignment operations + char* operations; // Raw alignment operations + int max_operations; // Maximum buffer size + int begin_offset; // Begin offset + int end_offset; // End offset + // Score and end position (useful for partial alignments like Z-dropped) + int score; // Computed scored + int end_v; // Alignment-end vertical coordinate (pattern characters aligned) + int end_h; // Alignment-end horizontal coordinate (text characters aligned) + // CIGAR (SAM compliant) + bool has_misms; // Show 'X' and '=', instead of just 'M' + uint32_t* cigar_buffer; // CIGAR-operations (max_operations length) + int cigar_length; // Total CIGAR-operations } cigar_t; /* * Setup */ -void cigar_allocate( - cigar_t* const cigar, - const int max_operations, - mm_allocator_t* const mm_allocator); +cigar_t* cigar_new( + const int max_operations); void cigar_clear( cigar_t* const cigar); void cigar_resize( @@ -71,15 +73,35 @@ void cigar_free( /* * Accessors */ -int cigar_get_matches( +bool cigar_is_null( cigar_t* const cigar); -void cigar_add_mismatches( - char* const pattern, - const int pattern_length, - char* const text, - const int text_length, + +int cigar_count_matches( cigar_t* const cigar); +void cigar_append_forward( + cigar_t* const cigar_dst, + cigar_t* const cigar_src); +void cigar_append_reverse( + cigar_t* const cigar_dst, + cigar_t* const cigar_src); + +void cigar_append_deletion( + cigar_t* const cigar, + const int length); +void cigar_append_insertion( + cigar_t* const cigar, + const int length); + +/* + * SAM-compliant CIGAR + */ +void cigar_get_CIGAR( + cigar_t* const cigar, + const bool show_mismatches, + uint32_t** const cigar_buffer, + int* const cigar_length); + /* * Score */ @@ -105,16 +127,26 @@ void cigar_copy( cigar_t* const cigar_dst, cigar_t* const cigar_src); -void cigar_append( - cigar_t* const cigar_dst, - cigar_t* const cigar_src); -void cigar_append_deletion( +void cigar_discover_mismatches( + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + cigar_t* const cigar); + +bool cigar_maxtrim_gap_linear( cigar_t* const cigar, - const int length); -void cigar_append_insertion( + linear_penalties_t* const penalties); +bool cigar_maxtrim_gap_affine( cigar_t* const cigar, - const int length); + affine_penalties_t* const penalties); +bool cigar_maxtrim_gap_affine2p( + cigar_t* const cigar, + affine2p_penalties_t* const penalties); +/* + * Check + */ bool cigar_check_alignment( FILE* const stream, const char* const pattern, @@ -132,16 +164,25 @@ void cigar_print( cigar_t* const cigar, const bool print_matches); int cigar_sprint( - char* buffer, + char* const buffer, cigar_t* const cigar, const bool print_matches); + +void cigar_print_SAM_CIGAR( + FILE* const stream, + cigar_t* const cigar, + const bool show_mismatches); +int cigar_sprint_SAM_CIGAR( + char* const buffer, + cigar_t* const cigar, + const bool show_mismatches); + void cigar_print_pretty( FILE* const stream, + cigar_t* const cigar, const char* const pattern, const int pattern_length, const char* const text, - const int text_length, - cigar_t* const cigar, - mm_allocator_t* const mm_allocator); + const int text_length); #endif /* CIGAR_H_ */ diff --git a/pywfa/WFA2_lib/alignment/score_matrix.c b/pywfa/WFA2_lib/alignment/score_matrix.c index e6b7cae..beae531 100644 --- a/pywfa/WFA2_lib/alignment/score_matrix.c +++ b/pywfa/WFA2_lib/alignment/score_matrix.c @@ -29,6 +29,7 @@ * DESCRIPTION: Score matrix for alignment using dynamic programming */ +#include "utils/commons.h" #include "score_matrix.h" /* @@ -114,7 +115,3 @@ void score_matrix_print( } fprintf(stream,"\n"); } - - - - diff --git a/pywfa/WFA2_lib/alignment/score_matrix.h b/pywfa/WFA2_lib/alignment/score_matrix.h index b6d4633..8abaf99 100644 --- a/pywfa/WFA2_lib/alignment/score_matrix.h +++ b/pywfa/WFA2_lib/alignment/score_matrix.h @@ -32,7 +32,6 @@ #ifndef SCORE_MATRIX_H_ #define SCORE_MATRIX_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" #include "alignment/cigar.h" diff --git a/pywfa/WFA2_lib/system/mm_allocator.c b/pywfa/WFA2_lib/system/mm_allocator.c index 3c5ff57..9683193 100644 --- a/pywfa/WFA2_lib/system/mm_allocator.c +++ b/pywfa/WFA2_lib/system/mm_allocator.c @@ -32,6 +32,7 @@ * and dispatching memory segments in order. */ +#include "utils/commons.h" #include "mm_allocator.h" /* diff --git a/pywfa/WFA2_lib/system/mm_allocator.h b/pywfa/WFA2_lib/system/mm_allocator.h index 1d2cac1..00390fa 100644 --- a/pywfa/WFA2_lib/system/mm_allocator.h +++ b/pywfa/WFA2_lib/system/mm_allocator.h @@ -35,6 +35,7 @@ #ifndef MM_ALLOCATOR_H_ #define MM_ALLOCATOR_H_ +#include #include "utils/vector.h" /* diff --git a/pywfa/WFA2_lib/system/mm_stack.c b/pywfa/WFA2_lib/system/mm_stack.c index eb57a5f..abb86d2 100644 --- a/pywfa/WFA2_lib/system/mm_stack.c +++ b/pywfa/WFA2_lib/system/mm_stack.c @@ -32,6 +32,7 @@ * requested at once. */ +#include "utils/commons.h" #include "mm_stack.h" /* diff --git a/pywfa/WFA2_lib/system/profiler_counter.c b/pywfa/WFA2_lib/system/profiler_counter.c index 465d316..52264ff 100644 --- a/pywfa/WFA2_lib/system/profiler_counter.c +++ b/pywfa/WFA2_lib/system/profiler_counter.c @@ -30,6 +30,7 @@ * DESCRIPTION: Simple profile counter */ +#include "utils/commons.h" #include "profiler_counter.h" /* diff --git a/pywfa/WFA2_lib/system/profiler_counter.h b/pywfa/WFA2_lib/system/profiler_counter.h index d7ce69d..55b1b98 100644 --- a/pywfa/WFA2_lib/system/profiler_counter.h +++ b/pywfa/WFA2_lib/system/profiler_counter.h @@ -33,7 +33,8 @@ #ifndef PROFILER_COUNTER_H_ #define PROFILER_COUNTER_H_ -#include "utils/commons.h" +#include +#include /* * Counters diff --git a/pywfa/WFA2_lib/system/profiler_timer.c b/pywfa/WFA2_lib/system/profiler_timer.c index cec1fd9..eb25853 100644 --- a/pywfa/WFA2_lib/system/profiler_timer.c +++ b/pywfa/WFA2_lib/system/profiler_timer.c @@ -30,6 +30,7 @@ * DESCRIPTION: Simple time profiler */ +#include "utils/commons.h" #include "profiler_timer.h" #ifdef __MACH__ diff --git a/pywfa/WFA2_lib/system/profiler_timer.h b/pywfa/WFA2_lib/system/profiler_timer.h index 52c84e0..d3aee5e 100644 --- a/pywfa/WFA2_lib/system/profiler_timer.h +++ b/pywfa/WFA2_lib/system/profiler_timer.h @@ -33,7 +33,7 @@ #ifndef PROFILER_TIMER_H #define PROFILER_TIMER_H -#include "utils/commons.h" +#include #include "profiler_counter.h" /* diff --git a/pywfa/WFA2_lib/utils/Makefile b/pywfa/WFA2_lib/utils/Makefile index 79d36bf..01665c2 100644 --- a/pywfa/WFA2_lib/utils/Makefile +++ b/pywfa/WFA2_lib/utils/Makefile @@ -12,7 +12,6 @@ MODULES=bitmap \ dna_text \ heatmap \ sequence_buffer \ - string_padded \ vector SRCS=$(addsuffix .c, $(MODULES)) diff --git a/pywfa/WFA2_lib/utils/bitmap.c b/pywfa/WFA2_lib/utils/bitmap.c index b4270b8..1b79133 100644 --- a/pywfa/WFA2_lib/utils/bitmap.c +++ b/pywfa/WFA2_lib/utils/bitmap.c @@ -29,6 +29,7 @@ * DESCRIPTION: Basic bitmap datastructure (static) */ +#include "utils/commons.h" #include "utils/bitmap.h" #include "system/mm_allocator.h" @@ -122,6 +123,3 @@ uint64_t bitmap_erank( const uint64_t bitmap_count = POPCOUNT_64(bitmap_masked); return bitmap_block->counter + bitmap_count; } - - - diff --git a/pywfa/WFA2_lib/utils/bitmap.h b/pywfa/WFA2_lib/utils/bitmap.h index 3fc53b0..3db5983 100644 --- a/pywfa/WFA2_lib/utils/bitmap.h +++ b/pywfa/WFA2_lib/utils/bitmap.h @@ -35,7 +35,6 @@ /* * Includes */ -#include "utils/commons.h" #include "system/mm_allocator.h" #define BITMAP_BLOCK_ELEMENTS 64 diff --git a/pywfa/WFA2_lib/utils/commons.h b/pywfa/WFA2_lib/utils/commons.h index 528801b..92a0b5e 100644 --- a/pywfa/WFA2_lib/utils/commons.h +++ b/pywfa/WFA2_lib/utils/commons.h @@ -29,15 +29,14 @@ * DESCRIPTION: Common functions/utilities and headers for C development */ -#ifndef COMMONS_H_ -#define COMMONS_H_ +#pragma once +#include #include #include #include #include -#include #include #include #include @@ -278,5 +277,3 @@ uint64_t nominal_prop_u64(const uint64_t base,const double factor); int i; \ for (i=0;i */ +#include "utils/commons.h" #include "heatmap.h" /* @@ -168,6 +169,3 @@ void heatmap_print( fprintf(stream,"\n"); } } - - - diff --git a/pywfa/WFA2_lib/utils/heatmap.h b/pywfa/WFA2_lib/utils/heatmap.h index ff904c5..fddadd1 100644 --- a/pywfa/WFA2_lib/utils/heatmap.h +++ b/pywfa/WFA2_lib/utils/heatmap.h @@ -31,7 +31,7 @@ #ifndef HEATMAP_H_ #define HEATMAP_H_ -#include "utils/commons.h" +#include /* * Heatmap diff --git a/pywfa/WFA2_lib/utils/sequence_buffer.c b/pywfa/WFA2_lib/utils/sequence_buffer.c index 2b7f750..a7126c6 100644 --- a/pywfa/WFA2_lib/utils/sequence_buffer.c +++ b/pywfa/WFA2_lib/utils/sequence_buffer.c @@ -29,6 +29,7 @@ * DESCRIPTION: Simple linear vector for generic type elements */ +#include "utils/commons.h" #include "utils/sequence_buffer.h" /* @@ -131,4 +132,3 @@ void sequence_buffer_add_pair( sequence_buffer->max_pattern_length = MAX(sequence_buffer->max_pattern_length,pattern_length); sequence_buffer->max_text_length = MAX(sequence_buffer->max_text_length,text_length); } - diff --git a/pywfa/WFA2_lib/utils/sequence_buffer.h b/pywfa/WFA2_lib/utils/sequence_buffer.h index 5a35ba6..961196b 100644 --- a/pywfa/WFA2_lib/utils/sequence_buffer.h +++ b/pywfa/WFA2_lib/utils/sequence_buffer.h @@ -31,7 +31,6 @@ #ifndef SEQUENCE_BUFFER_H_ #define SEQUENCE_BUFFER_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" typedef struct { diff --git a/pywfa/WFA2_lib/utils/string_padded.c b/pywfa/WFA2_lib/utils/string_padded.c deleted file mode 100644 index 4222d2f..0000000 --- a/pywfa/WFA2_lib/utils/string_padded.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * The MIT License - * - * Wavefront Alignment Algorithms - * Copyright (c) 2017 by Santiago Marco-Sola - * - * This file is part of Wavefront Alignment Algorithms. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * PROJECT: Wavefront Alignment Algorithms - * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: Padded string module to avoid handling corner conditions - */ - -#include "utils/string_padded.h" -#include "system/mm_allocator.h" - -/* - * Strings (text/pattern) padded - */ -void strings_padded_add_padding( - const char* const buffer, - const int buffer_length, - const int begin_padding_length, - const int end_padding_length, - const char padding_value, - char** const buffer_padded, - char** const buffer_padded_begin, - const bool reverse_sequence, - mm_allocator_t* const mm_allocator) { - // Allocate - const int buffer_padded_length = begin_padding_length + buffer_length + end_padding_length; - *buffer_padded = mm_allocator_malloc(mm_allocator,buffer_padded_length); - // Add begin padding - memset(*buffer_padded,padding_value,begin_padding_length); - // Copy buffer - *buffer_padded_begin = *buffer_padded + begin_padding_length; - if (reverse_sequence) { - int i; - for (i=0;imm_allocator = mm_allocator; - // Compute padding dimensions - const int pattern_begin_padding_length = 0; - const int pattern_end_padding_length = padding_length; - const int text_begin_padding_length = 0; - const int text_end_padding_length = padding_length; - // Add padding - strings_padded_add_padding( - pattern,pattern_length, - pattern_begin_padding_length,pattern_end_padding_length,'?', - &(strings_padded->pattern_padded_buffer), - &(strings_padded->pattern_padded), - reverse_sequences,mm_allocator); - strings_padded_add_padding( - text,text_length, - text_begin_padding_length,text_end_padding_length,'!', - &(strings_padded->text_padded_buffer), - &(strings_padded->text_padded), - reverse_sequences,mm_allocator); - // Return - return strings_padded; -} -strings_padded_t* strings_padded_new_rhomb( - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const int padding_length, - const bool reverse_sequences, - mm_allocator_t* const mm_allocator) { - // Allocate - strings_padded_t* const strings_padded = - mm_allocator_alloc(mm_allocator,strings_padded_t); - strings_padded->mm_allocator = mm_allocator; - // Compute padding dimensions - const int pattern_begin_padding_length = text_length + padding_length; - const int pattern_end_padding_length = pattern_length + text_length + padding_length; - const int text_begin_padding_length = padding_length; - const int text_end_padding_length = text_length + padding_length; - // Add padding - strings_padded_add_padding( - pattern,pattern_length, - pattern_begin_padding_length,pattern_end_padding_length,'?', - &(strings_padded->pattern_padded_buffer), - &(strings_padded->pattern_padded), - reverse_sequences,mm_allocator); - strings_padded_add_padding( - text,text_length, - text_begin_padding_length,text_end_padding_length,'!', - &(strings_padded->text_padded_buffer), - &(strings_padded->text_padded), - reverse_sequences,mm_allocator); - // Set lengths - strings_padded->pattern_length = pattern_length; - strings_padded->text_length = text_length; - // Return - return strings_padded; -} -void strings_padded_delete(strings_padded_t* const strings_padded) { - mm_allocator_free(strings_padded->mm_allocator,strings_padded->pattern_padded_buffer); - mm_allocator_free(strings_padded->mm_allocator,strings_padded->text_padded_buffer); - mm_allocator_free(strings_padded->mm_allocator,strings_padded); -} diff --git a/pywfa/WFA2_lib/utils/vector.c b/pywfa/WFA2_lib/utils/vector.c index 5870dac..5a9109f 100644 --- a/pywfa/WFA2_lib/utils/vector.c +++ b/pywfa/WFA2_lib/utils/vector.c @@ -30,6 +30,7 @@ * DESCRIPTION: Simple linear vector (generic type elements) */ +#include "utils/commons.h" #include "vector.h" /* @@ -122,4 +123,3 @@ vector_t* vector_dup( memcpy(vector_cpy->memory,vector_src->memory,vector_src->used*vector_src->element_size); return vector_cpy; } - diff --git a/pywfa/WFA2_lib/utils/vector.h b/pywfa/WFA2_lib/utils/vector.h index c632747..6043e22 100644 --- a/pywfa/WFA2_lib/utils/vector.h +++ b/pywfa/WFA2_lib/utils/vector.h @@ -33,7 +33,8 @@ #ifndef VECTOR_H_ #define VECTOR_H_ -#include "commons.h" +#include +#include /* * Checkers diff --git a/pywfa/WFA2_lib/wavefront/Makefile b/pywfa/WFA2_lib/wavefront/Makefile index 6fb4ee1..1b372ee 100644 --- a/pywfa/WFA2_lib/wavefront/Makefile +++ b/pywfa/WFA2_lib/wavefront/Makefile @@ -14,6 +14,7 @@ MODULES=wavefront_align \ wavefront_backtrace_offload \ wavefront_backtrace \ wavefront_bialign \ + wavefront_bialigner \ wavefront_components \ wavefront_compute_affine \ wavefront_compute_affine2p \ @@ -23,11 +24,16 @@ MODULES=wavefront_align \ wavefront_debug \ wavefront_display \ wavefront_extend \ + wavefront_extend_kernels_avx \ + wavefront_extend_kernels \ wavefront_heuristic \ wavefront_pcigar \ wavefront_penalties \ + wavefront_sequences \ wavefront_plot \ wavefront_slab \ + wavefront_termination \ + wavefront_unialign \ wavefront SRCS=$(addsuffix .c, $(MODULES)) diff --git a/pywfa/WFA2_lib/wavefront/wavefront.c b/pywfa/WFA2_lib/wavefront/wavefront.c index eb4bfce..97ccd27 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront.c +++ b/pywfa/WFA2_lib/wavefront/wavefront.c @@ -29,6 +29,8 @@ * DESCRIPTION: Individual WaveFront data structure */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront.h" /* @@ -88,14 +90,14 @@ void wavefront_init( wavefront_t* const wavefront, const int min_lo, const int max_hi) { - // Configure limits + // Limits wavefront->null = false; wavefront->lo = 1; wavefront->hi = -1; - wavefront->bt_occupancy_max = 0; - // Setup elements + // Elements wavefront->offsets = wavefront->offsets_mem - min_lo; // Center at k=0 if (wavefront->bt_pcigar_mem) { + wavefront->bt_occupancy_max = 0; wavefront->bt_pcigar = wavefront->bt_pcigar_mem - min_lo; // Center at k=0 wavefront->bt_prev = wavefront->bt_prev_mem - min_lo; // Center at k=0 } @@ -109,14 +111,14 @@ void wavefront_init_null( wavefront_t* const wavefront, const int min_lo, const int max_hi) { - // Configure limits + // Limits wavefront->null = true; wavefront->lo = 1; wavefront->hi = -1; - wavefront->bt_occupancy_max = 0; - // Setup elements + // Elements wavefront->offsets = wavefront->offsets_mem - min_lo; // Center at k=0 if (wavefront->bt_pcigar_mem) { + wavefront->bt_occupancy_max = 0; wavefront->bt_pcigar = wavefront->bt_pcigar_mem - min_lo; // Center at k=0 wavefront->bt_prev = wavefront->bt_prev_mem - min_lo; // Center at k=0 } @@ -170,7 +172,3 @@ uint64_t wavefront_get_size( } return total_size; } - - - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront.h b/pywfa/WFA2_lib/wavefront/wavefront.h index c0c920c..67c0483 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront.h +++ b/pywfa/WFA2_lib/wavefront/wavefront.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_H_ #define WAVEFRONT_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" #include "wavefront_offset.h" #include "wavefront_backtrace_buffer.h" @@ -59,15 +58,15 @@ typedef struct { bool null; // Is null interval? int lo; // Lowest diagonal (inclusive) int hi; // Highest diagonal (inclusive) - int bt_occupancy_max; // Maximum number of pcigar-ops stored on the Backtrace-block // Wavefront elements wf_offset_t* offsets; // Offsets (k-centered) + wf_offset_t* offsets_mem; // Offsets base memory (Internal) + // Piggyback backtrace + int bt_occupancy_max; // Maximum number of pcigar-ops stored on the Backtrace-block pcigar_t* bt_pcigar; // Backtrace-block pcigar (k-centered) bt_block_idx_t* bt_prev; // Backtrace-block previous-index (k-centered) - // Memory internals - wf_offset_t* offsets_mem; // Offsets base memory - pcigar_t* bt_pcigar_mem; // Backtrace-block (base memory) - bt_block_idx_t* bt_prev_mem; // Backtrace-block previous-index (base memory) + pcigar_t* bt_pcigar_mem; // Backtrace-block (base memory - Internal) + bt_block_idx_t* bt_prev_mem; // Backtrace-block previous-index (base memory - Internal) // Slab internals wavefront_status_type status; // Wavefront status (memory state) int wf_elements_allocated; // Total wf-elements allocated (max. wf. size) diff --git a/pywfa/WFA2_lib/wavefront/wavefront_align.c b/pywfa/WFA2_lib/wavefront/wavefront_align.c index 6e5f41d..41b49b6 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_align.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_align.c @@ -29,379 +29,86 @@ * DESCRIPTION: WaveFront alignment module for sequence pairwise alignment */ +#include "utils/commons.h" #include "wavefront_align.h" -#include "wavefront_aligner.h" -#include "wavefront_extend.h" +#include "wavefront_unialign.h" +#include "wavefront_bialign.h" #include "wavefront_compute.h" #include "wavefront_compute_edit.h" #include "wavefront_compute_linear.h" #include "wavefront_compute_affine.h" #include "wavefront_compute_affine2p.h" +#include "wavefront_extend.h" #include "wavefront_backtrace.h" #include "wavefront_debug.h" /* * Checks */ -void wavefront_check_endsfree_form( - wavefront_aligner_t* const wf_aligner, - const int pattern_length, - const int text_length) { - alignment_form_t* const form = &wf_aligner->alignment_form; - if (form->pattern_begin_free > pattern_length || - form->pattern_end_free > pattern_length || - form->text_begin_free > text_length || - form->text_end_free > text_length) { - fprintf(stderr,"[WFA] Ends-free parameters must be not larger than the sequences " - "(P0=%d,Pf=%d,T0=%d,Tf=%d). Must be (P0<=|P|,Pf<=|P|,T0<=|T|,Tf<=|T|) where (|P|,|T|)=(%d,%d)\n", - form->pattern_begin_free,form->pattern_end_free, - form->text_begin_free,form->text_end_free, - pattern_length,text_length); - exit(1); - } -} -/* - * Limits - */ -bool wavefront_align_reached_limits( - wavefront_aligner_t* const wf_aligner, - const int score) { - // Check alignment-score limit - if (score >= wf_aligner->system.max_alignment_score) { - wf_aligner->cigar.score = wf_aligner->system.max_alignment_score; - wf_aligner->align_status.status = WF_STATUS_MAX_SCORE_REACHED; - return true; // Stop - } - // Global probing interval - alignment_system_t* const system = &wf_aligner->system; - if ((score%system->probe_interval_global) != 0) return false; // Continue - if (system->verbose) { - wavefront_aligner_print_status(stderr,wf_aligner,score); // DEBUG - } - // BT-Buffer - wavefront_components_t*const wf_components = &wf_aligner->wf_components; - if (wf_components->bt_buffer!=NULL && (score%system->probe_interval_compact)==0) { - uint64_t bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer); - // Check BT-buffer memory - if (bt_memory > system->max_memory_compact) { - // Compact BT-buffer - wavefront_components_compact_bt_buffer(wf_components,score,wf_aligner->system.verbose); - // Set new buffer limit - bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer); - uint64_t proposed_mem = (double)bt_memory * TELESCOPIC_FACTOR; - if (system->max_memory_compact < proposed_mem && proposed_mem < system->max_memory_abort) { - proposed_mem = system->max_memory_compact; - } - // Reset (if maximum compacts has been performed) - if (wf_components->bt_buffer->num_compactions >= system->max_partial_compacts) { - wf_backtrace_buffer_reset_compaction(wf_components->bt_buffer); - } - } - } - // Check overall memory used - const uint64_t wf_memory_used = wavefront_aligner_get_size(wf_aligner); - if (wf_memory_used > system->max_memory_abort) { - wf_aligner->align_status.status = WF_STATUS_OOM; - return true; // Stop - } - // Otherwise continue - return false; -} -/* - * Initialize alignment - */ -void wavefront_align_end2end_initialize( - wavefront_aligner_t* const wf_aligner) { - // Parameters - wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; - wavefront_components_t* const wf_components = &wf_aligner->wf_components; - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - const int max_score_scope = wf_components->max_score_scope; - const int effective_lo = -(max_score_scope+1); - const int effective_hi = (max_score_scope+1); - // Init wavefronts - switch (wf_aligner->component_begin) { - case affine2p_matrix_M: - wf_components->mwavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - wf_components->mwavefronts[0]->offsets[0] = 0; - wf_components->mwavefronts[0]->lo = 0; - wf_components->mwavefronts[0]->hi = 0; - if (wf_components->bt_piggyback) { // Store initial BT-piggypack element - wf_components->mwavefronts[0]->bt_pcigar[0] = 0; - wf_components->mwavefronts[0]->bt_prev[0] = - wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); - } - // Nullify unused WFs - if (distance_metric <= gap_linear) return; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = NULL; - if (distance_metric==gap_affine) return; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_I1: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - wf_components->i1wavefronts[0]->offsets[0] = 0; - wf_components->i1wavefronts[0]->lo = 0; - wf_components->i1wavefronts[0]->hi = 0; - wf_components->d1wavefronts[0] = NULL; - // Nullify unused WFs - if (distance_metric==gap_affine) return; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_I2: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = NULL; - wf_components->i2wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - wf_components->i2wavefronts[0]->offsets[0] = 0; - wf_components->i2wavefronts[0]->lo = 0; - wf_components->i2wavefronts[0]->hi = 0; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_D1: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - wf_components->d1wavefronts[0]->offsets[0] = 0; - wf_components->d1wavefronts[0]->lo = 0; - wf_components->d1wavefronts[0]->hi = 0; - // Nullify unused WFs - if (distance_metric==gap_affine) return; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_D2: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = NULL; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - wf_components->d2wavefronts[0]->offsets[0] = 0; - wf_components->d2wavefronts[0]->lo = 0; - wf_components->d2wavefronts[0]->hi = 0; - break; - default: - break; - } -} -void wavefront_align_endsfree_initialize( +void wavefront_align_presets__checks( wavefront_aligner_t* const wf_aligner, const int pattern_length, const int text_length) { - // Check - wavefront_check_endsfree_form(wf_aligner,pattern_length,text_length); // Parameters - wavefront_components_t* const wf_components = &wf_aligner->wf_components; - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - const int text_begin_free = wf_aligner->alignment_form.text_begin_free; - const int pattern_begin_free = wf_aligner->alignment_form.pattern_begin_free; - const int max_score_scope = wf_components->max_score_scope; - // Init wavefront zero - const int effective_lo = -pattern_begin_free - (max_score_scope+1); - const int effective_hi = text_begin_free + (max_score_scope+1); - wf_components->mwavefronts[0] = wavefront_slab_allocate( - wf_aligner->wavefront_slab,effective_lo,effective_hi); - wf_components->mwavefronts[0]->offsets[0] = 0; - wf_components->mwavefronts[0]->lo = -pattern_begin_free; - wf_components->mwavefronts[0]->hi = text_begin_free; - // Store initial BT-piggypack element - if (wf_components->bt_piggyback) { - const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); - wf_components->mwavefronts[0]->bt_pcigar[0] = 0; - wf_components->mwavefronts[0]->bt_prev[0] = block_idx; + alignment_form_t* const form = &wf_aligner->alignment_form; + /* + * Configuration presets + */ + if (form->span == alignment_endsfree && wf_aligner->alignment_form.extension) { + // Configure WF-extend mode + form->pattern_begin_free = 0; + form->pattern_end_free = pattern_length; + form->text_begin_free = 0; + form->text_end_free = text_length; } - // Init text begin-free - int h; - for (h=1;h<=text_begin_free;++h) { - const int k = DPMATRIX_DIAGONAL(h,0); - wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(h,0); - if (wf_components->bt_piggyback) { - const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,h); - wf_components->mwavefronts[0]->bt_pcigar[k] = 0; - wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + /* + * Checks + */ + if (wf_aligner->bialigner != NULL) { + const bool ends_free = + form->pattern_begin_free > 0 || + form->pattern_end_free > 0 || + form->text_begin_free > 0 || + form->text_end_free > 0; + if (ends_free) { + fprintf(stderr,"[WFA] BiWFA ends-free has not been tested properly yet (let me know and I'll do it)\n"); + exit(1); } - } - // Init pattern begin-free - int v; - for (v=1;v<=pattern_begin_free;++v) { - const int k = DPMATRIX_DIAGONAL(0,v); - wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(0,v); - if (wf_components->bt_piggyback) { - const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,v,0); - wf_components->mwavefronts[0]->bt_pcigar[k] = 0; - wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + if (wf_aligner->alignment_form.extension) { + fprintf(stderr,"[WFA] BiWFA extension is not implemented yet (let me know and I'll add it)\n"); + exit(1); } } - // Nullify unused WFs - if (distance_metric <= gap_linear) return; - wf_components->d1wavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - if (distance_metric==gap_affine) return; - wf_components->d2wavefronts[0] = NULL; - wf_components->i2wavefronts[0] = NULL; -} -/* - * Terminate alignment (backtrace) - */ -void wavefront_align_terminate( - wavefront_aligner_t* const wf_aligner, - const int score) { - // Parameters const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - const int swg_match_score = -(wf_aligner->penalties.match); - // Retrieve alignment - if (wf_aligner->alignment_scope == compute_score) { - cigar_clear(&wf_aligner->cigar); - wf_aligner->cigar.score = (distance_metric <= edit) ? score : - WF_PENALTIES_GET_SW_SCORE(swg_match_score,pattern_length,text_length,score); - } else { - // Parameters - wavefront_components_t* const wf_components = &wf_aligner->wf_components; - const int alignment_end_k = wf_aligner->alignment_end_pos.k; - const wf_offset_t alignment_end_offset = wf_aligner->alignment_end_pos.offset; - if (wf_components->bt_piggyback) { - // Fetch wavefront - const bool memory_modular = wf_aligner->wf_components.memory_modular; - const int max_score_scope = wf_aligner->wf_components.max_score_scope; - const int score_mod = (memory_modular) ? score % max_score_scope : score; - wavefront_t* const mwavefront = wf_components->mwavefronts[score_mod]; - // Backtrace alignment from buffer (unpacking pcigar) - wavefront_backtrace_pcigar( - wf_aligner,alignment_end_k,alignment_end_offset, - mwavefront->bt_pcigar[alignment_end_k], - mwavefront->bt_prev[alignment_end_k]); - } else { - // Backtrace alignment - if (wf_aligner->penalties.distance_metric <= gap_linear) { - wavefront_backtrace_linear(wf_aligner, - score,alignment_end_k,alignment_end_offset); - } else { - wavefront_backtrace_affine(wf_aligner, - wf_aligner->component_begin,wf_aligner->component_end, - score,alignment_end_k,alignment_end_offset); - } - } - // Set score & finish - wf_aligner->cigar.score = (distance_metric <= edit) ? score : - WF_PENALTIES_GET_SW_SCORE(swg_match_score,pattern_length,text_length,score); + const bool is_heuristic_drop = + (wf_aligner->heuristic.strategy & wf_heuristic_xdrop) || + (wf_aligner->heuristic.strategy & wf_heuristic_zdrop); + if (is_heuristic_drop && (distance_metric==edit || distance_metric==indel)) { + fprintf(stderr,"[WFA] Heuristics drops are not compatible with 'edit'/'indel' distance metrics\n"); + exit(1); } -} -/* - * General Alignment - */ -int wavefront_align_sequences( - wavefront_aligner_t* const wf_aligner) { - // Parameters - wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status; - void (*wf_align_compute)(wavefront_aligner_t* const,const int) = wf_align_status->wf_align_compute; - int (*wf_align_extend)(wavefront_aligner_t* const,const int) = wf_align_status->wf_align_extend; - // Compute wavefronts of increasing score - int score = wf_aligner->align_status.score; - while (true) { - // Exact extend s-wavefront - const int finished = (*wf_align_extend)(wf_aligner,score); - if (finished) { - // DEBUG - // wavefront_aligner_print(stderr,wf_aligner,0,score,7,0); - if (wf_aligner->align_status.status == WF_STATUS_SUCCESSFUL) { - wavefront_align_terminate(wf_aligner,score); - } - wf_aligner->align_status.score = score; - return wf_aligner->align_status.status; - } - // Compute (s+1)-wavefront - ++score; - (*wf_align_compute)(wf_aligner,score); - // Probe limits - if (wavefront_align_reached_limits(wf_aligner,score)) { - wf_aligner->align_status.score = score; - return wf_aligner->align_status.status; - } - // PROFILE - if (wf_aligner->plot_params.plot_enabled) { - wavefront_plot(wf_aligner,wf_aligner->pattern,wf_aligner->text,score); + if (form->span == alignment_endsfree) { + if (form->pattern_begin_free > pattern_length || + form->pattern_end_free > pattern_length || + form->text_begin_free > text_length || + form->text_end_free > text_length) { + fprintf(stderr,"[WFA] Ends-free parameters must be not larger than the sequences " + "(P0=%d,Pf=%d,T0=%d,Tf=%d). Must be (P0<=|P|,Pf<=|P|,T0<=|T|,Tf<=|T|) where (|P|,|T|)=(%d,%d)\n", + form->pattern_begin_free,form->pattern_end_free, + form->text_begin_free,form->text_end_free, + pattern_length,text_length); + exit(1); } - // DEBUG - // wavefront_aligner_print(stderr,wf_aligner,0,score,7,0); } - // Return OK - wf_aligner->align_status.score = score; - wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; - return WF_STATUS_SUCCESSFUL; } /* - * Wavefront Alignment Begin/End + * Wavefront Alignment Unidirectional */ -void wavefront_align_sequences_init( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length) { - // Parameters - wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status; - // Resize wavefront aligner - wavefront_aligner_resize(wf_aligner,pattern,pattern_length,text,text_length,false); - // Configure WF-compute function - switch (wf_aligner->penalties.distance_metric) { - case indel: - case edit: - wf_align_status->wf_align_compute = &wavefront_compute_edit; - break; - case gap_linear: - wf_align_status->wf_align_compute = &wavefront_compute_linear; - break; - case gap_affine: - wf_align_status->wf_align_compute = &wavefront_compute_affine; - break; - case gap_affine_2p: - wf_align_status->wf_align_compute = &wavefront_compute_affine2p; - break; - default: - fprintf(stderr,"[WFA] Distance function not implemented\n"); - exit(1); - break; - } - // Configure WF-extend function - const bool end2end = (wf_aligner->alignment_form.span == alignment_end2end); - if (wf_aligner->match_funct != NULL) { - wf_align_status->wf_align_extend = &wavefront_extend_custom; - } else if (end2end) { - wf_align_status->wf_align_extend = &wavefront_extend_end2end; - } else { - wf_align_status->wf_align_extend = &wavefront_extend_endsfree; - } - // Initialize wavefront - wf_aligner->alignment_end_pos.score = -1; // Not aligned - wf_aligner->alignment_end_pos.k = DPMATRIX_DIAGONAL_NULL; - if (end2end) { - wavefront_align_end2end_initialize(wf_aligner); - } else { - wavefront_align_endsfree_initialize(wf_aligner,pattern_length,text_length); - } - // Plot WF-0 - const bool plot = wf_aligner->plot_params.plot_enabled; - if (plot) { - wavefront_plot(wf_aligner,pattern,text,0); - } -} -void wavefront_align_start( - wavefront_aligner_t* const wf_aligner) { - // DEBUG - wavefront_debug_prologue(wf_aligner); -} -void wavefront_align_finish( +void wavefront_align_unidirectional_cleanup( wavefront_aligner_t* const wf_aligner) { // Compute memory used uint64_t memory_used = wavefront_aligner_get_size(wf_aligner); wf_aligner->align_status.memory_used = memory_used; - // DEBUG - wavefront_debug_epilogue(wf_aligner); // Reap memory (controlled reaping) if (memory_used > wf_aligner->system.max_memory_resident) { // Wavefront components @@ -412,51 +119,95 @@ void wavefront_align_finish( // Slab if (memory_used > wf_aligner->system.max_memory_resident) { wavefront_slab_reap(wf_aligner->wavefront_slab); - if (wf_aligner->aligner_forward != NULL) { - wavefront_slab_reap(wf_aligner->aligner_forward->wavefront_slab); - } - if (wf_aligner->aligner_reverse != NULL) { - wavefront_slab_reap(wf_aligner->aligner_reverse->wavefront_slab); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_reap(wf_aligner->bialigner); } } } } +void wavefront_align_unidirectional( + wavefront_aligner_t* const wf_aligner) { + // Wavefront align sequences + wavefront_unialign_init(wf_aligner,affine2p_matrix_M,affine2p_matrix_M); // Init + wavefront_unialign(wf_aligner); // Align + // Finish + if (wf_aligner->align_status.status == WF_STATUS_MAX_STEPS_REACHED) return; // Alignment paused + wavefront_align_unidirectional_cleanup(wf_aligner); +} /* - * Wavefront Alignment + * Wavefront Alignment Bidirectional */ -void wavefront_align_unidirectional( +void wavefront_align_bidirectional( + wavefront_aligner_t* const wf_aligner) { + // Bidirectional alignment + wavefront_bialign(wf_aligner); // Align + // Finish + wf_aligner->align_status.memory_used = wavefront_aligner_get_size(wf_aligner); +} +/* + * Wavefront Alignment Dispatcher + */ +int wavefront_align_lambda( wavefront_aligner_t* const wf_aligner, - const char* const pattern, + alignment_match_funct_t match_funct, + void* match_funct_arguments, const int pattern_length, - const char* const text, const int text_length) { - // Prepare alignment - wavefront_align_sequences_init(wf_aligner,pattern,pattern_length,text,text_length); - // Wavefront align sequences - wavefront_align_sequences(wf_aligner); + // Checks + wavefront_align_presets__checks(wf_aligner,pattern_length,text_length); + wavefront_debug_begin(wf_aligner); + // Plot + if (wf_aligner->plot != NULL) wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); + // Dispatcher + if (wf_aligner->bialigner == NULL) { + // Prepare Sequences + wavefront_sequences_init_lambda(&wf_aligner->sequences, + match_funct,match_funct_arguments, + pattern_length,text_length,false); + wavefront_align_unidirectional(wf_aligner); + } else { + // Prepare Sequences + wavefront_bialigner_set_sequences_lambda(wf_aligner->bialigner, + match_funct,match_funct_arguments, + pattern_length,text_length); + // Align + wavefront_align_bidirectional(wf_aligner); + } + // DEBUG + wavefront_debug_end(wf_aligner); + wavefront_debug_check_correct(wf_aligner); + // Return + return wf_aligner->align_status.status; } -void wavefront_align_bidirectional( +int wavefront_align_packed2bits( wavefront_aligner_t* const wf_aligner, - const char* const pattern, + const uint8_t* const pattern, const int pattern_length, - const char* const text, + const uint8_t* const text, const int text_length) { - // Parameters - wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status; - // Allocate cigar - cigar_t cigar; - cigar_allocate(&cigar,2*(pattern_length+text_length),wf_aligner->mm_allocator); - // Bidirectional alignment - wavefront_bialign(wf_aligner, - pattern,pattern_length,text,text_length, - &wf_aligner->alignment_form, - affine_matrix_M,affine_matrix_M, - INT_MAX,&cigar,0); - // Swap and free cigar - SWAP(wf_aligner->cigar,cigar); - cigar_free(&cigar); - // Finish - wf_align_status->status = WF_STATUS_SUCCESSFUL; // For the moment, all good + // Checks + wavefront_align_presets__checks(wf_aligner,pattern_length,text_length); + wavefront_debug_begin(wf_aligner); + // Plot + if (wf_aligner->plot != NULL) wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); + // Dispatcher + if (wf_aligner->bialigner == NULL) { + // Prepare Sequences + wavefront_sequences_init_packed2bits(&wf_aligner->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_align_unidirectional(wf_aligner); + } else { + // Prepare Sequences + wavefront_bialigner_set_sequences_packed2bits(wf_aligner->bialigner, + pattern,pattern_length,text,text_length); + // Align + wavefront_align_bidirectional(wf_aligner); + } + // DEBUG + wavefront_debug_end(wf_aligner); + wavefront_debug_check_correct(wf_aligner); + // Return + return wf_aligner->align_status.status; } int wavefront_align( wavefront_aligner_t* const wf_aligner, @@ -464,43 +215,52 @@ int wavefront_align( const int pattern_length, const char* const text, const int text_length) { - // Parameters - wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status; - // Start alignment - wavefront_align_start(wf_aligner); + // Checks + wavefront_align_presets__checks(wf_aligner,pattern_length,text_length); + wavefront_debug_begin(wf_aligner); + // Plot + if (wf_aligner->plot != NULL) wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); // Dispatcher - if (wf_aligner->bidirectional_alignment) { - wavefront_align_bidirectional(wf_aligner,pattern,pattern_length,text,text_length); + if (wf_aligner->bialigner == NULL) { + // Prepare Sequences + wavefront_sequences_init_ascii(&wf_aligner->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_align_unidirectional(wf_aligner); } else { - wavefront_align_unidirectional(wf_aligner,pattern,pattern_length,text,text_length); - // Check pause condition - if (wf_align_status->status == WF_STATUS_MAX_SCORE_REACHED) { - return WF_STATUS_MAX_SCORE_REACHED; // Alignment paused - } + // Prepare Sequences + wavefront_bialigner_set_sequences_ascii(wf_aligner->bialigner, + pattern,pattern_length,text,text_length); + // Align + wavefront_align_bidirectional(wf_aligner); } - // Finish alignment - wavefront_align_finish(wf_aligner); + // DEBUG + wavefront_debug_end(wf_aligner); + wavefront_debug_check_correct(wf_aligner); // Return - return wf_align_status->status; + return wf_aligner->align_status.status; } +/* + * Wavefront Alignment Resume (Experimental) + */ int wavefront_align_resume( wavefront_aligner_t* const wf_aligner) { // Parameters - wavefront_align_status_t* const wf_align_status = &wf_aligner->align_status; + wavefront_align_status_t* const align_status = &wf_aligner->align_status; // Check current alignment status - if (wf_align_status->status != WF_STATUS_MAX_SCORE_REACHED) { - fprintf(stderr,"[WFA] Alignment cannot be resumed (already finished)\n"); + if (align_status->status != WF_STATUS_MAX_STEPS_REACHED || + wf_aligner->bialigner != NULL) { + fprintf(stderr,"[WFA] Alignment cannot be resumed\n"); exit(1); } // Resume aligning sequences - wavefront_align_sequences(wf_aligner); - // Check pause condition - if (wf_align_status->status == WF_STATUS_MAX_SCORE_REACHED) { - return WF_STATUS_MAX_SCORE_REACHED; // Alignment paused - } + wavefront_unialign(wf_aligner); // Finish alignment - wavefront_align_finish(wf_aligner); + if (align_status->status == WF_STATUS_MAX_STEPS_REACHED) { + return WF_STATUS_MAX_STEPS_REACHED; // Alignment paused + } + wavefront_align_unidirectional_cleanup(wf_aligner); + // DEBUG + wavefront_debug_check_correct(wf_aligner); // Return - return wf_align_status->status; + return align_status->status; } - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_align.h b/pywfa/WFA2_lib/wavefront/wavefront_align.h index 40cddf4..85180ad 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_align.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_align.h @@ -29,21 +29,9 @@ * DESCRIPTION: WaveFront alignment module for sequence pairwise alignment */ -#ifndef WAVEFRONT_ALIGN_H_ -#define WAVEFRONT_ALIGN_H_ +#pragma once #include "wavefront_aligner.h" -#include "wavefront_display.h" // For convenience - -/* - * Initialize alignment - */ -void wavefront_align_end2end_initialize( - wavefront_aligner_t* const wf_aligner); -void wavefront_align_endsfree_initialize( - wavefront_aligner_t* const wf_aligner, - const int pattern_length, - const int text_length); /* * Wavefront Alignment @@ -54,7 +42,15 @@ int wavefront_align( const int pattern_length, const char* const text, const int text_length); -int wavefront_align_resume( - wavefront_aligner_t* const wf_aligner); - -#endif /* WAVEFRONT_ALIGN_H_ */ +int wavefront_align_lambda( + wavefront_aligner_t* const wf_aligner, + alignment_match_funct_t const match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length); +int wavefront_align_packed2bits( + wavefront_aligner_t* const wf_aligner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length); diff --git a/pywfa/WFA2_lib/wavefront/wavefront_aligner.c b/pywfa/WFA2_lib/wavefront/wavefront_aligner.c index 0c0018d..f32ed26 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_aligner.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_aligner.c @@ -29,70 +29,162 @@ * DESCRIPTION: WaveFront aligner data structure */ +#include "utils/commons.h" #include "wavefront_aligner.h" #include "wavefront_components.h" #include "wavefront_heuristic.h" #include "wavefront_plot.h" +#include "wavefront_compute.h" +#include "wavefront_sequences.h" /* * Configuration */ #define PATTERN_LENGTH_INIT 1000 #define TEXT_LENGTH_INIT 1000 -#define SEQUENCES_PADDING 10 /* * Error messages */ -char* wf_error_msg[] = -{ - /* WF_STATUS_OOM == -3 */ "[WFA] Alignment failed. Maximum memory threshold reached", - /* WF_STATUS_MAX_SCORE_REACHED == -2 */ "[WFA] Alignment failed. Maximum score reached", - /* WF_STATUS_HEURISTICALY_DROPPED == -1 */ "[WFA] Alignment dropped heuristically", - /* WF_STATUS_SUCCESSFUL == 0 */ "[WFA] Alignment successful", - /* WF_STATUS_IN_PROGRESS == 1 */ "[WFA] Alignment in progress", -}; -char* wavefront_align_strerror(const int wf_error_code) { - return wf_error_msg[wf_error_code+3]; +// OK +#define WF_STATUS_ALG_COMPLETED_MSG "[WFA] Alignment completed successfully" +#define WF_STATUS_ALG_PARTIAL_MSG "[WFA] Alignment extension computed (partial alignment)" +#define WF_STATUS_ALG_COMPLETED_MSG_SHORT "OK.Full" +#define WF_STATUS_ALG_PARTIAL_MSG_SHORT "OK.Partial" +// FAILED +#define WF_STATUS_MAX_STEPS_REACHED_MSG "[WFA] Alignment failed. Maximum WFA-steps limit reached" +#define WF_STATUS_OOM_MSG "[WFA] Alignment failed. Maximum memory limit reached" +#define WF_STATUS_UNATTAINABLE_MSG "[WFA] Alignment failed. Unattainable under configured heuristics" +#define WF_STATUS_MAX_STEPS_REACHED_MSG_SHORT "FAILED.MaxWFASteps" +#define WF_STATUS_OOM_MSG_SHORT "FAILED.OOM" +#define WF_STATUS_UNATTAINABLE_MSG_SHORT "FAILED.Unattainable" + +// Internal +#define WF_STATUS_END_REACHED_MSG "[WFA] Alignment end reached" +#define WF_STATUS_END_UNREACHABLE_MSG "[WFA] Alignment end unreachable under current configuration (due to heuristics like Z-drop)" +#define WF_STATUS_UNKNOWN_MSG "[WFA] Unknown error code" +#define WF_STATUS_END_REACHED_MSG_SHORT "INTERNAL.Reached" +#define WF_STATUS_END_UNREACHABLE_MSG_SHORT "INTERNAL.Dropped" +#define WF_STATUS_UNKNOWN_MSG_SHORT "Unknown" +/* */ +char* wavefront_align_strerror(const int error_code) { + // OK + if (error_code == WF_STATUS_ALG_COMPLETED) return WF_STATUS_ALG_COMPLETED_MSG; + if (error_code == WF_STATUS_ALG_PARTIAL) return WF_STATUS_ALG_PARTIAL_MSG; + // FAILED + if (error_code == WF_STATUS_MAX_STEPS_REACHED) return WF_STATUS_MAX_STEPS_REACHED_MSG; + if (error_code == WF_STATUS_OOM) return WF_STATUS_OOM_MSG; + if (error_code == WF_STATUS_UNATTAINABLE) return WF_STATUS_UNATTAINABLE_MSG; + // Internal + if (error_code == WF_STATUS_END_REACHED) return WF_STATUS_END_REACHED_MSG; + if (error_code == WF_STATUS_END_UNREACHABLE) return WF_STATUS_END_UNREACHABLE_MSG; + // Unknown + return WF_STATUS_UNKNOWN_MSG; +} +char* wavefront_align_strerror_short(const int error_code) { + // OK + if (error_code == WF_STATUS_ALG_COMPLETED) return WF_STATUS_ALG_COMPLETED_MSG_SHORT; + if (error_code == WF_STATUS_ALG_PARTIAL) return WF_STATUS_ALG_PARTIAL_MSG_SHORT; + // FAILED + if (error_code == WF_STATUS_MAX_STEPS_REACHED) return WF_STATUS_MAX_STEPS_REACHED_MSG_SHORT; + if (error_code == WF_STATUS_OOM) return WF_STATUS_OOM_MSG_SHORT; + if (error_code == WF_STATUS_UNATTAINABLE) return WF_STATUS_UNATTAINABLE_MSG_SHORT; + // Internal + if (error_code == WF_STATUS_END_REACHED) return WF_STATUS_END_REACHED_MSG_SHORT; + if (error_code == WF_STATUS_END_UNREACHABLE) return WF_STATUS_END_UNREACHABLE_MSG_SHORT; + // Unknown + return WF_STATUS_UNKNOWN_MSG_SHORT; } /* - * Alignment status + * Initialize Status & System */ -void wavefront_align_status_clear( - wavefront_align_status_t* const wf_align_status) { - wf_align_status->status = WF_STATUS_IN_PROGRESS; - wf_align_status->score = 0; +void wavefront_aligner_init_status( + wavefront_aligner_t* const wf_aligner) { + wf_aligner->align_status.status = WF_STATUS_OK; + wf_aligner->align_status.score = 0; + wf_aligner->align_status.dropped = false; +} +void wavefront_aligner_init_system( + wavefront_aligner_t* const wf_aligner) { + // Reset effective limits + wf_aligner->system.max_memory_compact = BUFFER_SIZE_256M; + wf_aligner->system.max_memory_resident = BUFFER_SIZE_256M + BUFFER_SIZE_256M; + switch (wf_aligner->memory_mode) { + case wavefront_memory_med: + wf_aligner->system.max_partial_compacts = 4; + break; + case wavefront_memory_low: + wf_aligner->system.max_partial_compacts = 1; + break; + default: + break; + } } /* - * Setup + * Initialize Memory + */ +wavefront_aligner_t* wavefront_aligner_init_mm( + mm_allocator_t* mm_allocator, + const bool memory_modular, + const bool bt_piggyback, + const bool bi_alignment) { + // MM + bool mm_allocator_own; + if (mm_allocator == NULL) { + mm_allocator = mm_allocator_new((bi_alignment) ? BUFFER_SIZE_4K : BUFFER_SIZE_4M); + mm_allocator_own = true; + } else { + mm_allocator_own = false; + } + // Handler + wavefront_aligner_t* const wf_aligner = + mm_allocator_alloc(mm_allocator,wavefront_aligner_t); + // Configure MM + wf_aligner->mm_allocator = mm_allocator; + wf_aligner->mm_allocator_own = mm_allocator_own; + // Slab + if (bi_alignment) { + wf_aligner->wavefront_slab = NULL; + } else { + const wf_slab_mode_t slab_mode = (memory_modular) ? wf_slab_reuse : wf_slab_tight; + wf_aligner->wavefront_slab = wavefront_slab_new(1000,bt_piggyback,slab_mode,wf_aligner->mm_allocator); + } + // Return + return wf_aligner; +} +/* + * Initialize Penalties */ void wavefront_aligner_init_penalties( wavefront_aligner_t* const wf_aligner, wavefront_aligner_attr_t* const attributes) { switch (attributes->distance_metric) { case indel: - wavefronts_penalties_set_indel(&wf_aligner->penalties); + wavefront_penalties_set_indel(&wf_aligner->penalties); break; case edit: - wavefronts_penalties_set_edit(&wf_aligner->penalties); + wavefront_penalties_set_edit(&wf_aligner->penalties); break; case gap_linear: - wavefronts_penalties_set_linear( + wavefront_penalties_set_linear( &wf_aligner->penalties, &attributes->linear_penalties); break; case gap_affine: - wavefronts_penalties_set_affine( + wavefront_penalties_set_affine( &wf_aligner->penalties, &attributes->affine_penalties); break; case gap_affine_2p: - wavefronts_penalties_set_affine2p( + wavefront_penalties_set_affine2p( &wf_aligner->penalties, &attributes->affine2p_penalties); break; } } +/* + * Initialize Heuristics + */ void wavefront_aligner_init_heuristic( wavefront_aligner_t* const wf_aligner, wavefront_aligner_attr_t* const attributes) { @@ -101,32 +193,49 @@ void wavefront_aligner_init_heuristic( // Select and configure heuristics if (wf_heuristic->strategy == wf_heuristic_none) { wavefront_heuristic_set_none(&wf_aligner->heuristic); - } else if (wf_heuristic->strategy == wf_heuristic_banded_static) { - wavefront_heuristic_set_banded_static(&wf_aligner->heuristic, - wf_heuristic->min_k,wf_heuristic->max_k); - } else if (wf_heuristic->strategy == wf_heuristic_banded_adaptive) { - wavefront_heuristic_set_banded_adaptive(&wf_aligner->heuristic, - wf_heuristic->min_k,wf_heuristic->max_k,wf_heuristic->steps_between_cutoffs); - } else if (wf_heuristic->strategy == wf_heuristic_wfadaptive) { - wavefront_heuristic_set_wfadaptive( - &wf_aligner->heuristic,wf_heuristic->min_wavefront_length, - wf_heuristic->max_distance_threshold,wf_heuristic->steps_between_cutoffs); - } else if (wf_heuristic->strategy == wf_heuristic_xdrop) { - wavefront_heuristic_set_xdrop(&wf_aligner->heuristic, - wf_heuristic->xdrop,wf_heuristic->steps_between_cutoffs); - } else if (wf_heuristic->strategy == wf_heuristic_zdrop) { - wavefront_heuristic_set_zdrop(&wf_aligner->heuristic, - wf_heuristic->zdrop,wf_heuristic->steps_between_cutoffs); + } else { + // Reset + wf_aligner->heuristic.strategy = 0; + // WF-Adaptive + if (wf_heuristic->strategy & wf_heuristic_wfadaptive) { + wavefront_heuristic_set_wfadaptive( + &wf_aligner->heuristic,wf_heuristic->min_wavefront_length, + wf_heuristic->max_distance_threshold,wf_heuristic->steps_between_cutoffs); + } else if (wf_heuristic->strategy & wf_heuristic_wfmash) { + wavefront_heuristic_set_wfmash( + &wf_aligner->heuristic,wf_heuristic->min_wavefront_length, + wf_heuristic->max_distance_threshold,wf_heuristic->steps_between_cutoffs); + } + // Drops + if (wf_heuristic->strategy & wf_heuristic_xdrop) { + wavefront_heuristic_set_xdrop(&wf_aligner->heuristic, + wf_heuristic->xdrop,wf_heuristic->steps_between_cutoffs); + } else if (wf_heuristic->strategy & wf_heuristic_zdrop) { + wavefront_heuristic_set_zdrop(&wf_aligner->heuristic, + wf_heuristic->zdrop,wf_heuristic->steps_between_cutoffs); + } + // Banded + if (wf_heuristic->strategy & wf_heuristic_banded_static) { + wavefront_heuristic_set_banded_static(&wf_aligner->heuristic, + wf_heuristic->min_k,wf_heuristic->max_k); + } else if (wf_heuristic->strategy & wf_heuristic_banded_adaptive) { + wavefront_heuristic_set_banded_adaptive(&wf_aligner->heuristic, + wf_heuristic->min_k,wf_heuristic->max_k,wf_heuristic->steps_between_cutoffs); + } } } +/* + * Initialize Alignment (mode, scope, form) + */ void wavefront_aligner_init_alignment( wavefront_aligner_t* const wf_aligner, wavefront_aligner_attr_t* const attributes, const bool memory_modular, - const bool bt_piggyback) { - // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + const bool bt_piggyback, + const bool bi_alignment) { + // Mode + wf_aligner->align_mode = (bi_alignment) ? wf_align_biwfa : wf_align_regular; + wf_aligner->align_mode_tag = NULL; // Score & form wf_aligner->alignment_scope = attributes->alignment_scope; wf_aligner->alignment_form = attributes->alignment_form; @@ -135,200 +244,259 @@ void wavefront_aligner_init_alignment( // Memory mode wf_aligner->memory_mode = attributes->memory_mode; wavefront_aligner_init_heuristic(wf_aligner,attributes); - // Custom matching functions - wf_aligner->match_funct = attributes->match_funct; - wf_aligner->match_funct_arguments = attributes->match_funct_arguments; - // Wavefront components - wavefront_components_allocate( - &wf_aligner->wf_components,pattern_length,text_length, - &wf_aligner->penalties,memory_modular,bt_piggyback, - wf_aligner->mm_allocator); - wf_aligner->component_begin = affine2p_matrix_M; - wf_aligner->component_end = affine2p_matrix_M; - // Wavefront bidirectional - const bool bidirectional_alignment = (attributes->memory_mode == wavefront_memory_ultralow); - wf_aligner->bidirectional_alignment = bidirectional_alignment; - if (bidirectional_alignment) { - // Configure subsidiary aligners - wavefront_aligner_attr_t subsidiary_attr = wavefront_aligner_attr_default; - // Inherit attributes from master aligner - subsidiary_attr.distance_metric = attributes->distance_metric; - subsidiary_attr.linear_penalties = attributes->linear_penalties; - subsidiary_attr.affine_penalties = attributes->affine_penalties; - subsidiary_attr.affine2p_penalties = attributes->affine2p_penalties; - subsidiary_attr.match_funct = attributes->match_funct; - subsidiary_attr.match_funct_arguments = attributes->match_funct_arguments; - // Set specifics for subsidiary aligners - subsidiary_attr.heuristic.strategy = wf_heuristic_none; - subsidiary_attr.memory_mode = wavefront_memory_high; - subsidiary_attr.alignment_scope = compute_score; - // Allocate subsidiary aligners - wf_aligner->aligner_forward = wavefront_aligner_new(&subsidiary_attr); - wf_aligner->aligner_reverse = wavefront_aligner_new(&subsidiary_attr); - } else { - wf_aligner->aligner_forward = NULL; - wf_aligner->aligner_reverse = NULL; - } -} -void wavefront_aligner_init_system( - wavefront_aligner_t* const wf_aligner, - alignment_system_t* const system) { - // Copy all parameters - wf_aligner->system = *system; - // Reset effective limits - wf_aligner->system.max_memory_compact = BUFFER_SIZE_256M; - wf_aligner->system.max_memory_resident = BUFFER_SIZE_256M + BUFFER_SIZE_256M; - switch (wf_aligner->memory_mode) { - case wavefront_memory_med: - wf_aligner->system.max_partial_compacts = 4; - break; - case wavefront_memory_low: - wf_aligner->system.max_partial_compacts = 1; - break; - default: - break; - } - // Profile - timer_reset(&wf_aligner->system.timer); } -wavefront_aligner_t* wavefront_aligner_new( - wavefront_aligner_attr_t* attributes) { +/* + * Initialize wavefront-vectors (Initial alignment conditions) + */ +void wavefront_aligner_init_wf_m( + wavefront_aligner_t* const wf_aligner) { // Parameters - const int pattern_length = PATTERN_LENGTH_INIT; - const int text_length = TEXT_LENGTH_INIT; - if (attributes == NULL) attributes = &wavefront_aligner_attr_default; - const bool score_only = (attributes->alignment_scope == compute_score); - const bool memory_modular = score_only || - attributes->memory_mode == wavefront_memory_med || - attributes->memory_mode == wavefront_memory_low; - const bool bt_piggyback = !score_only && - (attributes->memory_mode == wavefront_memory_med || - attributes->memory_mode == wavefront_memory_low); - // MM - mm_allocator_t* mm_allocator = attributes->mm_allocator; - bool mm_allocator_own = false; - if (mm_allocator == NULL) { - mm_allocator = mm_allocator_new(BUFFER_SIZE_4M); - mm_allocator_own = true; - } - // Handler - wavefront_aligner_t* const wf_aligner = mm_allocator_alloc(mm_allocator,wavefront_aligner_t); - wf_aligner->mm_allocator = mm_allocator; - wf_aligner->mm_allocator_own = mm_allocator_own; - const wf_slab_mode_t slab_mode = (memory_modular) ? wf_slab_reuse : wf_slab_tight; - wf_aligner->wavefront_slab = wavefront_slab_new(1000,bt_piggyback,slab_mode,mm_allocator); - // Sequences - wf_aligner->pattern_length = pattern_length; - wf_aligner->text_length = text_length; - wf_aligner->sequences = NULL; - // Alignment - wavefront_aligner_init_alignment(wf_aligner,attributes,memory_modular,bt_piggyback); - // CIGAR - if (!score_only) { - cigar_allocate(&wf_aligner->cigar,2*(pattern_length+text_length),mm_allocator); + wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + wavefront_penalties_t* const penalties = &wf_aligner->penalties; + alignment_form_t* const form = &wf_aligner->alignment_form; + // Consider ends-free + const int hi = (penalties->match==0) ? form->text_begin_free : 0; + const int lo = (penalties->match==0) ? -form->pattern_begin_free : 0; + // Compute dimensions + int effective_lo, effective_hi; + wavefront_compute_limits_output(wf_aligner,lo,hi,&effective_lo,&effective_hi); + // Initialize end2end (wavefront zero) + wf_components->mwavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->mwavefronts[0]->offsets[0] = 0; + wf_components->mwavefronts[0]->lo = lo; + wf_components->mwavefronts[0]->hi = hi; + // Store initial BT-piggypack element + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); + wf_components->mwavefronts[0]->bt_pcigar[0] = 0; + wf_components->mwavefronts[0]->bt_prev[0] = block_idx; } - // Display - wf_aligner->plot_params = attributes->plot_params; - if (attributes->plot_params.plot_enabled) { - wavefront_plot_allocate(&wf_aligner->wf_plot, - wf_aligner->penalties.distance_metric, - pattern_length,text_length, - &wf_aligner->plot_params); + // Initialize ends-free + if (form->span == alignment_endsfree && penalties->match == 0) { + // Text begin-free + const int text_begin_free = form->text_begin_free; + int h; + for (h=1;h<=text_begin_free;++h) { + const int k = DPMATRIX_DIAGONAL(h,0); + wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(h,0); + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,h); + wf_components->mwavefronts[0]->bt_pcigar[k] = 0; + wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + } + } + // Pattern begin-free + const int pattern_begin_free = form->pattern_begin_free; + int v; + for (v=1;v<=pattern_begin_free;++v) { + const int k = DPMATRIX_DIAGONAL(0,v); + wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(0,v); + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,v,0); + wf_components->mwavefronts[0]->bt_pcigar[k] = 0; + wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + } + } } - // System - wavefront_aligner_init_system(wf_aligner,&attributes->system); - // Return - return wf_aligner; + // Nullify unused WFs + if (distance_metric <= gap_linear) return; + wf_components->d1wavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + if (distance_metric==gap_affine) return; + wf_components->d2wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = NULL; } -void wavefront_aligner_resize( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const bool reverse_sequences) { +void wavefront_aligner_init_wf( + wavefront_aligner_t* const wf_aligner) { // Parameters - const bool score_only = (wf_aligner->alignment_scope == compute_score); - // Configure sequences and status - wf_aligner->pattern_length = pattern_length; - wf_aligner->text_length = text_length; - if (wf_aligner->match_funct == NULL) { - if (wf_aligner->sequences != NULL) strings_padded_delete(wf_aligner->sequences); - wf_aligner->sequences = strings_padded_new_rhomb( - pattern,pattern_length,text,text_length, - SEQUENCES_PADDING,reverse_sequences, - wf_aligner->mm_allocator); - wf_aligner->pattern = wf_aligner->sequences->pattern_padded; - wf_aligner->text = wf_aligner->sequences->text_padded; + wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + // Init wavefronts + if (wf_aligner->component_begin == affine2p_matrix_M) { + // Initialize + wavefront_aligner_init_wf_m(wf_aligner); + // Nullify unused WFs + if (distance_metric <= gap_linear) return; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + if (distance_metric == gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; } else { - wf_aligner->sequences = NULL; - wf_aligner->pattern = NULL; - wf_aligner->text = NULL; + // Compute dimensions + int effective_lo, effective_hi; // Effective lo/hi + wavefront_compute_limits_output(wf_aligner,0,0,&effective_lo,&effective_hi); + wavefront_t* const wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + // Initialize + switch (wf_aligner->component_begin) { + case affine2p_matrix_I1: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = wavefront; + wf_components->i1wavefronts[0]->offsets[0] = 0; + wf_components->i1wavefronts[0]->lo = 0; + wf_components->i1wavefronts[0]->hi = 0; + wf_components->d1wavefronts[0] = NULL; + // Nullify unused WFs + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_I2: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = wavefront; + wf_components->i2wavefronts[0]->offsets[0] = 0; + wf_components->i2wavefronts[0]->lo = 0; + wf_components->i2wavefronts[0]->hi = 0; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_D1: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = wavefront; + wf_components->d1wavefronts[0]->offsets[0] = 0; + wf_components->d1wavefronts[0]->lo = 0; + wf_components->d1wavefronts[0]->hi = 0; + // Nullify unused WFs + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_D2: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = wavefront; + wf_components->d2wavefronts[0]->offsets[0] = 0; + wf_components->d2wavefronts[0]->lo = 0; + wf_components->d2wavefronts[0]->hi = 0; + break; + default: + break; + } } - wavefront_align_status_clear(&wf_aligner->align_status); +} +/* + * Initialize Aligner (to perform a new alignment) + */ +void wavefront_aligner_init( + wavefront_aligner_t* const wf_aligner, + const int align_level) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + // Configure status + wavefront_aligner_init_status(wf_aligner); // Heuristics clear wavefront_heuristic_clear(&wf_aligner->heuristic); // Wavefront components wavefront_components_resize(&wf_aligner->wf_components, pattern_length,text_length,&wf_aligner->penalties); // CIGAR - if (!score_only) { - cigar_resize(&wf_aligner->cigar,2*(pattern_length+text_length)); + if (wf_aligner->alignment_scope == compute_alignment) { + cigar_resize(wf_aligner->cigar,2*(pattern_length+text_length)); } // Slab wavefront_slab_clear(wf_aligner->wavefront_slab); - // Display - if (wf_aligner->plot_params.plot_enabled) { - wavefront_plot_free(&wf_aligner->wf_plot); - wavefront_plot_allocate(&wf_aligner->wf_plot, - wf_aligner->penalties.distance_metric, - pattern_length,text_length, - &wf_aligner->plot_params); + // System + wavefront_aligner_init_system(wf_aligner); + // Initialize wavefront + wf_aligner->align_status.num_null_steps = 0; // Zero null steps + wf_aligner->alignment_end_pos.score = -1; // Not aligned + wf_aligner->alignment_end_pos.k = DPMATRIX_DIAGONAL_NULL; + wf_aligner->alignment_end_pos.offset = WAVEFRONT_OFFSET_NULL; + wavefront_aligner_init_wf(wf_aligner); + // Plot (WF_0) + if (wf_aligner->plot != NULL) wavefront_plot(wf_aligner,0,align_level); +} +/* + * Setup + */ +wavefront_aligner_t* wavefront_aligner_new( + wavefront_aligner_attr_t* attributes) { + // Parameters + if (attributes == NULL) attributes = &wavefront_aligner_attr_default; + const bool score_only = (attributes->alignment_scope == compute_score); + const bool memory_succint = + attributes->memory_mode == wavefront_memory_med || + attributes->memory_mode == wavefront_memory_low; + const bool memory_modular = score_only || memory_succint; + const bool bt_piggyback = !score_only && memory_succint; + const bool bi_alignment = (attributes->memory_mode == wavefront_memory_ultralow); + // Handler + wavefront_aligner_t* const wf_aligner = wavefront_aligner_init_mm( + attributes->mm_allocator,memory_modular,bt_piggyback,bi_alignment); + // Plot + if (attributes->plot.enabled) { + wf_aligner->plot = wavefront_plot_new(attributes->distance_metric, + PATTERN_LENGTH_INIT,TEXT_LENGTH_INIT,&attributes->plot); + } else { + wf_aligner->plot = NULL; + } + // Alignment + wavefront_aligner_init_alignment(wf_aligner,attributes,memory_modular,bt_piggyback,bi_alignment); + if (bi_alignment) { + wf_aligner->bialigner = wavefront_bialigner_new(attributes,wf_aligner->plot); + } else { + wf_aligner->bialigner = NULL; + // Wavefront components + wavefront_components_allocate( + &wf_aligner->wf_components,PATTERN_LENGTH_INIT,TEXT_LENGTH_INIT, + &wf_aligner->penalties,memory_modular,bt_piggyback, + wf_aligner->mm_allocator); } + // Sequences + wavefront_sequences_allocate(&wf_aligner->sequences); + // CIGAR + const int cigar_length = (score_only) ? 10 : 2*(PATTERN_LENGTH_INIT+TEXT_LENGTH_INIT); + wf_aligner->cigar = cigar_new(cigar_length); // System - wavefront_aligner_init_system(wf_aligner,&wf_aligner->system); + wf_aligner->system = attributes->system; + // Return + return wf_aligner; } void wavefront_aligner_reap( wavefront_aligner_t* const wf_aligner) { - // Padded sequences - if (wf_aligner->sequences != NULL) { - strings_padded_delete(wf_aligner->sequences); + // Select alignment mode + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_reap(wf_aligner->bialigner); + } else { + // Wavefront components + wavefront_components_reap(&wf_aligner->wf_components); + // Slab + wavefront_slab_reap(wf_aligner->wavefront_slab); } - // Wavefront components - wavefront_components_reap(&wf_aligner->wf_components); - // Subsidiary aligners - if (wf_aligner->aligner_forward != NULL) wavefront_aligner_reap(wf_aligner->aligner_forward); - if (wf_aligner->aligner_reverse != NULL) wavefront_aligner_reap(wf_aligner->aligner_reverse); - // Slab - wavefront_slab_reap(wf_aligner->wavefront_slab); } void wavefront_aligner_delete( wavefront_aligner_t* const wf_aligner) { // Parameters - const bool score_only = (wf_aligner->alignment_scope == compute_score); mm_allocator_t* const mm_allocator = wf_aligner->mm_allocator; - // Padded sequences - if (wf_aligner->sequences != NULL) { - strings_padded_delete(wf_aligner->sequences); + const bool mm_allocator_own = wf_aligner->mm_allocator_own; + // Sequences + wavefront_sequences_free(&wf_aligner->sequences); + // Select alignment mode + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_delete(wf_aligner->bialigner); + } else { + // Wavefront components + wavefront_components_free(&wf_aligner->wf_components); + // Slab + wavefront_slab_delete(wf_aligner->wavefront_slab); } - // Wavefront components - wavefront_components_free(&wf_aligner->wf_components); - // Subsidiary aligners - if (wf_aligner->aligner_forward != NULL) wavefront_aligner_delete(wf_aligner->aligner_forward); - if (wf_aligner->aligner_reverse != NULL) wavefront_aligner_delete(wf_aligner->aligner_reverse); // CIGAR - if (!score_only) { - cigar_free(&wf_aligner->cigar); - } - // Slab - wavefront_slab_delete(wf_aligner->wavefront_slab); - // Display - if (wf_aligner->plot_params.plot_enabled) { - wavefront_plot_free(&wf_aligner->wf_plot); + cigar_free(wf_aligner->cigar); + // Plot + if (wf_aligner->plot != NULL && wf_aligner->align_mode <= 1) { + wavefront_plot_delete(wf_aligner->plot); } // MM - const bool mm_allocator_own = wf_aligner->mm_allocator_own; - mm_allocator_free(mm_allocator,wf_aligner); // Handler + mm_allocator_free(mm_allocator,wf_aligner); if (mm_allocator_own) { mm_allocator_delete(mm_allocator); } @@ -339,6 +507,7 @@ void wavefront_aligner_delete( void wavefront_aligner_set_alignment_end_to_end( wavefront_aligner_t* const wf_aligner) { wf_aligner->alignment_form.span = alignment_end2end; + wf_aligner->alignment_form.extension = false; } void wavefront_aligner_set_alignment_free_ends( wavefront_aligner_t* const wf_aligner, @@ -347,67 +516,100 @@ void wavefront_aligner_set_alignment_free_ends( const int text_begin_free, const int text_end_free) { wf_aligner->alignment_form.span = alignment_endsfree; + wf_aligner->alignment_form.extension = false; wf_aligner->alignment_form.pattern_begin_free = pattern_begin_free; wf_aligner->alignment_form.pattern_end_free = pattern_end_free; wf_aligner->alignment_form.text_begin_free = text_begin_free; wf_aligner->alignment_form.text_end_free = text_end_free; } +void wavefront_aligner_set_alignment_extension( + wavefront_aligner_t* const wf_aligner) { + wf_aligner->alignment_form.span = alignment_endsfree; + wf_aligner->alignment_form.extension = true; +} /* * Heuristic configuration */ void wavefront_aligner_set_heuristic_none( wavefront_aligner_t* const wf_aligner) { wavefront_heuristic_set_none(&wf_aligner->heuristic); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } } void wavefront_aligner_set_heuristic_banded_static( wavefront_aligner_t* const wf_aligner, const int band_min_k, const int band_max_k) { wavefront_heuristic_set_banded_static(&wf_aligner->heuristic,band_min_k,band_max_k); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } } void wavefront_aligner_set_heuristic_banded_adaptive( wavefront_aligner_t* const wf_aligner, const int band_min_k, const int band_max_k, const int score_steps) { - wavefront_heuristic_set_banded_adaptive(&wf_aligner->heuristic,band_min_k,band_max_k,score_steps); + wavefront_heuristic_set_banded_adaptive( + &wf_aligner->heuristic,band_min_k,band_max_k,score_steps); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } } void wavefront_aligner_set_heuristic_wfadaptive( wavefront_aligner_t* const wf_aligner, const int min_wavefront_length, const int max_distance_threshold, const int score_steps) { - wavefront_heuristic_set_wfadaptive(&wf_aligner->heuristic,min_wavefront_length,max_distance_threshold,score_steps); + wavefront_heuristic_set_wfadaptive( + &wf_aligner->heuristic, + min_wavefront_length,max_distance_threshold,score_steps); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } +} +void wavefront_aligner_set_heuristic_wfmash( + wavefront_aligner_t* const wf_aligner, + const int min_wavefront_length, + const int max_distance_threshold, + const int score_steps) { + wavefront_heuristic_set_wfmash( + &wf_aligner->heuristic, + min_wavefront_length,max_distance_threshold,score_steps); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } } void wavefront_aligner_set_heuristic_xdrop( wavefront_aligner_t* const wf_aligner, const int xdrop, const int score_steps) { wavefront_heuristic_set_xdrop(&wf_aligner->heuristic,xdrop,score_steps); + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } } void wavefront_aligner_set_heuristic_zdrop( wavefront_aligner_t* const wf_aligner, const int ydrop, const int score_steps) { wavefront_heuristic_set_zdrop(&wf_aligner->heuristic,ydrop,score_steps); -} -/* - * Match-funct configuration - */ -void wavefront_aligner_set_match_funct( - wavefront_aligner_t* const wf_aligner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments) { - wf_aligner->match_funct = match_funct; - wf_aligner->match_funct_arguments = match_funct_arguments; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); + } } /* * System configuration */ -void wavefront_aligner_set_max_alignment_score( +void wavefront_aligner_set_max_alignment_steps( wavefront_aligner_t* const wf_aligner, - const int max_alignment_score) { - wf_aligner->system.max_alignment_score = max_alignment_score; + const int max_alignment_steps) { + wf_aligner->system.max_alignment_steps = max_alignment_steps; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_max_alignment_steps( + wf_aligner->bialigner,max_alignment_steps); + } } void wavefront_aligner_set_max_memory( wavefront_aligner_t* const wf_aligner, @@ -415,6 +617,28 @@ void wavefront_aligner_set_max_memory( const uint64_t max_memory_abort) { wf_aligner->system.max_memory_resident = max_memory_resident; wf_aligner->system.max_memory_abort = max_memory_abort; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_max_memory( + wf_aligner->bialigner,max_memory_resident,max_memory_abort); + } +} +void wavefront_aligner_set_max_num_threads( + wavefront_aligner_t* const wf_aligner, + const int max_num_threads) { + wf_aligner->system.max_num_threads = max_num_threads; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_max_num_threads( + wf_aligner->bialigner,max_num_threads); + } +} +void wavefront_aligner_set_min_offsets_per_thread( + wavefront_aligner_t* const wf_aligner, + const int min_offsets_per_thread) { + wf_aligner->system.min_offsets_per_thread = min_offsets_per_thread; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_min_offsets_per_thread( + wf_aligner->bialigner,min_offsets_per_thread); + } } /* * Utils @@ -423,59 +647,88 @@ uint64_t wavefront_aligner_get_size( wavefront_aligner_t* const wf_aligner) { // Parameters wavefront_components_t* const wf_components = &wf_aligner->wf_components; + // Bialigner uint64_t sub_aligners = 0; - if (wf_aligner->aligner_forward != NULL) { - sub_aligners += wavefront_aligner_get_size(wf_aligner->aligner_forward); + if (wf_aligner->bialigner != NULL) { + return wavefront_bialigner_get_size(wf_aligner->bialigner); + } else { + // Compute aligner size + const uint64_t bt_buffer_size = (wf_components->bt_buffer) ? + wf_backtrace_buffer_get_size_allocated(wf_components->bt_buffer) : 0; + const uint64_t slab_size = wavefront_slab_get_size(wf_aligner->wavefront_slab); + // Return overall size + return sub_aligners + bt_buffer_size + slab_size; } - if (wf_aligner->aligner_reverse != NULL) { - sub_aligners += wavefront_aligner_get_size(wf_aligner->aligner_reverse); +} +bool wavefront_aligner_maxtrim_cigar( + wavefront_aligner_t* const wf_aligner) { + switch (wf_aligner->penalties.distance_metric) { + case gap_linear: + return cigar_maxtrim_gap_linear(wf_aligner->cigar,&wf_aligner->penalties.linear_penalties); + case gap_affine: + return cigar_maxtrim_gap_affine(wf_aligner->cigar,&wf_aligner->penalties.affine_penalties); + case gap_affine_2p: + return cigar_maxtrim_gap_affine2p(wf_aligner->cigar,&wf_aligner->penalties.affine2p_penalties); + default: + return false; // Maxtrim does not apply to edit/indel distances } - // Compute aligner size - const uint64_t bt_buffer_size = (wf_components->bt_buffer) ? - wf_backtrace_buffer_get_size_allocated(wf_components->bt_buffer) : 0; - const uint64_t slab_size = wavefront_slab_get_size(wf_aligner->wavefront_slab); - // Return overall size - return sub_aligners + bt_buffer_size + slab_size; } /* * Display */ -void wavefront_aligner_print_status( +void wavefront_aligner_print_mode( FILE* const stream, - wavefront_aligner_t* const wf_aligner, - const int score) { - // Parameters - wavefront_components_t* const wf_components = &wf_aligner->wf_components; - // Approximate progress - const int dist_total = MAX(wf_aligner->text_length,wf_aligner->pattern_length); - int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; - wavefront_t* wavefront = wf_components->mwavefronts[s]; - if (wavefront==NULL && s>0) { - s = (wf_components->memory_modular) ? (score-1)%wf_components->max_score_scope : (score-1); - wavefront = wf_components->mwavefronts[s]; - } - int dist_max = -1, wf_len = -1, k; - if (wavefront!=NULL) { - wf_offset_t* const offsets = wavefront->offsets; - for (k=wavefront->lo;k<=wavefront->hi;++k) { - const int dist = MAX(WAVEFRONT_V(k,offsets[k]),WAVEFRONT_H(k,offsets[k])); - dist_max = MAX(dist_max,dist); - } - wf_len = wavefront->hi-wavefront->lo+1; - } - // Memory used - const uint64_t slab_size = wavefront_slab_get_size(wf_aligner->wavefront_slab); - const uint64_t bt_buffer_used = (wf_components->bt_buffer) ? - wf_backtrace_buffer_get_size_used(wf_components->bt_buffer) : 0; - // Print one-line status - fprintf(stream, - "[WFA] SequenceLength=(%d,%d) Score %d (~ %2.3f%% aligned). " - "MemoryUsed(WF-Slab,BT-buffer)=(%lu MB,%lu MB). " - "Wavefronts ~ %2.3f Moffsets\n", - wf_aligner->pattern_length,wf_aligner->text_length, - score,(dist_max>=0) ? (100.0f*(float)dist_max/(float)dist_total) : -1.0f, - CONVERT_B_TO_MB(slab_size),CONVERT_B_TO_MB(bt_buffer_used), - (wf_len>=0) ? (float)wf_len/1000000.0f : -1.0f); + wavefront_aligner_t* const wf_aligner) { + if (wf_aligner->align_mode_tag != NULL) { + fprintf(stream,"%s::",wf_aligner->align_mode_tag); + } + switch (wf_aligner->align_mode) { + case wf_align_biwfa: + fprintf(stream,"BiWFA"); + break; + case wf_align_biwfa_breakpoint_forward: + fprintf(stream,"BiWFA::Forward"); + break; + case wf_align_biwfa_breakpoint_reverse: + fprintf(stream,"BiWFA::Reverse"); + break; + case wf_align_biwfa_subsidiary: + fprintf(stream,"BiWFA::SubWFA"); + break; + default: + fprintf(stream,"WFA"); + break; + } +} +void wavefront_aligner_print_scope( + FILE* const stream, + wavefront_aligner_t* const wf_aligner) { + const char* const scope_label = + (wf_aligner->alignment_scope == compute_score) ? "score" : "alignment"; + if (wf_aligner->alignment_form.span == alignment_end2end) { + fprintf(stream,"(%s,end2end)",scope_label); + } else { + fprintf(stream,"(%s,endsfree,%d,%d,%d,%d)", + scope_label, + wf_aligner->alignment_form.pattern_begin_free, + wf_aligner->alignment_form.pattern_end_free, + wf_aligner->alignment_form.text_begin_free, + wf_aligner->alignment_form.text_end_free); + } +} +void wavefront_aligner_print_conf( + FILE* const stream, + wavefront_aligner_t* const wf_aligner) { + fprintf(stream,"("); + switch (wf_aligner->memory_mode) { + case wavefront_memory_high: fprintf(stream,"MHigh"); break; + case wavefront_memory_med: fprintf(stream,"MMed"); break; + case wavefront_memory_low: fprintf(stream,"MLow"); break; + case wavefront_memory_ultralow: fprintf(stream,"BiWFA"); break; + } + if (wf_aligner->system.max_alignment_steps == INT_MAX) { + fprintf(stream,",inf)"); + } else { + fprintf(stream,",%d)",wf_aligner->system.max_alignment_steps); + } } - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_aligner.h b/pywfa/WFA2_lib/wavefront/wavefront_aligner.h index 0dd80d7..00e30a4 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_aligner.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_aligner.h @@ -29,174 +29,21 @@ * DESCRIPTION: WaveFront aligner data structure */ -#ifndef WAVEFRONT_ALIGNER_H_ -#define WAVEFRONT_ALIGNER_H_ +#pragma once -#include "utils/commons.h" #include "utils/heatmap.h" -#include "utils/string_padded.h" #include "system/profiler_counter.h" #include "system/profiler_timer.h" -#include "system/mm_allocator.h" #include "system/mm_stack.h" #include "alignment/cigar.h" -#include "wavefront_slab.h" -#include "wavefront_penalties.h" -#include "wavefront_attributes.h" -#include "wavefront_components.h" -#include "wavefront_align.h" -#include "wavefront_bialign.h" +#include "wfa.h" /* - * Error codes & messages + * Initialize Aligner (to perform a new alignment) */ -#define WF_STATUS_SUCCESSFUL 0 -#define WF_STATUS_IN_PROGRESS 1 -#define WF_STATUS_HEURISTICALY_DROPPED -1 -#define WF_STATUS_MAX_SCORE_REACHED -2 -#define WF_STATUS_OOM -3 -extern char* wf_error_msg[5]; -char* wavefront_align_strerror(const int wf_error_code); - -/* - * Alignment status - */ -typedef struct _wavefront_aligner_t wavefront_aligner_t; -typedef struct { - // Status - int status; // Status code - int score; // Current WF-alignment score - uint64_t memory_used; // Total memory used - // Wavefront alignment functions - void (*wf_align_compute)(wavefront_aligner_t* const,const int); // WF Compute function - int (*wf_align_extend)(wavefront_aligner_t* const,const int); // WF Extend function -} wavefront_align_status_t; - -/* - * Wavefront Aligner - */ -typedef struct _wavefront_aligner_t { - // Status - wavefront_align_status_t align_status; // Current alignment status - // Sequences - strings_padded_t* sequences; // Padded sequences - char* pattern; // Pattern sequence (padded) - int pattern_length; // Pattern length - char* text; // Text sequence (padded) - int text_length; // Text length - // Sequences (reversed) - strings_padded_t* sequences_rev; // Padded sequences reversed - char* pattern_rev; // Pattern sequence (padded & reversed) - int pattern_length_rev; // Pattern length reversed - char* text_rev; // Text sequence (padded & reversed) - int text_length_rev; // Text length reversed - // Alignment Attributes - alignment_scope_t alignment_scope; // Alignment scope (score only or full-CIGAR) - alignment_form_t alignment_form; // Alignment form (end-to-end/ends-free) - wavefronts_penalties_t penalties; // Alignment penalties - wavefront_heuristic_t heuristic; // Heuristic's parameters - wavefront_memory_t memory_mode; // Wavefront memory strategy (modular wavefronts and piggyback) - // Custom function to compare sequences - alignment_match_funct_t match_funct; // Custom matching function (match(v,h,args)) - void* match_funct_arguments; // Generic arguments passed to matching function (args) - // Wavefront components - wavefront_components_t wf_components; // Wavefront components - affine2p_matrix_type component_begin; // Alignment begin component - affine2p_matrix_type component_end; // Alignment end component - wavefront_pos_t alignment_end_pos; // Alignment end position - // Bidirectional Alignment - bool bidirectional_alignment; // Enable bidirectional WFA alignment - wavefront_aligner_t* aligner_forward; // Forward aligner - wavefront_aligner_t* aligner_reverse; // Reverse aligner - wf_bialign_breakpoint_t bialign_breakpoint; // Breakpoint of two wavefronts (bialigner) - // CIGAR - cigar_t cigar; // Alignment CIGAR - // MM - bool mm_allocator_own; // Ownership of MM-Allocator - mm_allocator_t* mm_allocator; // MM-Allocator - wavefront_slab_t* wavefront_slab; // MM-Wavefront-Slab (Allocates/Reuses the individual wavefronts) - // Display - wavefront_plot_params_t plot_params; // Wavefront plot parameters - wavefront_plot_t wf_plot; // Wavefront plot - // System - alignment_system_t system; // System related parameters -} wavefront_aligner_t; - -/* - * Setup - */ -wavefront_aligner_t* wavefront_aligner_new( - wavefront_aligner_attr_t* attributes); -void wavefront_aligner_resize( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const bool reverse_sequences); -void wavefront_aligner_reap( - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_delete( - wavefront_aligner_t* const wf_aligner); - -/* - * Span configuration - */ -void wavefront_aligner_set_alignment_end_to_end( - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_set_alignment_free_ends( +void wavefront_aligner_init( wavefront_aligner_t* const wf_aligner, - const int pattern_begin_free, - const int pattern_end_free, - const int text_begin_free, - const int text_end_free); - -/* - * Heuristic configuration - */ -void wavefront_aligner_set_heuristic_none( - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_set_heuristic_banded_static( - wavefront_aligner_t* const wf_aligner, - const int band_min_k, - const int band_max_k); -void wavefront_aligner_set_heuristic_banded_adaptive( - wavefront_aligner_t* const wf_aligner, - const int band_min_k, - const int band_max_k, - const int score_steps); -void wavefront_aligner_set_heuristic_wfadaptive( - wavefront_aligner_t* const wf_aligner, - const int min_wavefront_length, - const int max_distance_threshold, - const int score_steps); -void wavefront_aligner_set_heuristic_xdrop( - wavefront_aligner_t* const wf_aligner, - const int xdrop, - const int score_steps); -void wavefront_aligner_set_heuristic_zdrop( - wavefront_aligner_t* const wf_aligner, - const int ydrop, - const int score_steps); - -/* - * Match-funct configuration - */ -void wavefront_aligner_set_match_funct( - wavefront_aligner_t* const wf_aligner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments); - -/* - * System configuration - */ -void wavefront_aligner_set_max_alignment_score( - wavefront_aligner_t* const wf_aligner, - const int max_alignment_score); -void wavefront_aligner_set_max_memory( - wavefront_aligner_t* const wf_aligner, - const uint64_t max_memory_resident, - const uint64_t max_memory_abort); + const int align_level); /* * Utils @@ -204,12 +51,20 @@ void wavefront_aligner_set_max_memory( uint64_t wavefront_aligner_get_size( wavefront_aligner_t* const wf_aligner); +bool wavefront_aligner_maxtrim_cigar( + wavefront_aligner_t* const wf_aligner); + /* * Display */ -void wavefront_aligner_print_status( +void wavefront_aligner_print_mode( FILE* const stream, - wavefront_aligner_t* const wf_aligner, - const int current_score); + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_print_scope( + FILE* const stream, + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_print_conf( + FILE* const stream, + wavefront_aligner_t* const wf_aligner); + -#endif /* WAVEFRONT_ALIGNER_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_attributes.c b/pywfa/WFA2_lib/wavefront/wavefront_attributes.c index 7f6fbcb..fe64ce6 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_attributes.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_attributes.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront aligner data structure attributes */ +#include "utils/commons.h" #include "wavefront_attributes.h" /* @@ -40,14 +41,12 @@ wavefront_aligner_attr_t wavefront_aligner_attr_default = { .alignment_scope = compute_alignment, .alignment_form = { .span = alignment_end2end, + .extension = false, .pattern_begin_free = 0, .pattern_end_free = 0, .text_begin_free = 0, .text_end_free = 0, }, - // Custom matching functions - .match_funct = NULL, // Use default match-compare function - .match_funct_arguments = NULL, // No arguments // Penalties .linear_penalties = { .match = 0, @@ -80,17 +79,14 @@ wavefront_aligner_attr_t wavefront_aligner_attr_default = { // MM .mm_allocator = NULL, // Use private MM // Display - .plot_params = { - .plot_enabled = false, + .plot = { + .enabled = false, .resolution_points = 2000, - .min_v = -1, - .max_v = -1, - .min_h = -1, - .max_h = -1, + .align_level = 0, }, // System .system = { - .max_alignment_score = INT_MAX, // Unlimited + .max_alignment_steps = INT_MAX, // Unlimited .probe_interval_global = 3000, .probe_interval_compact = 6000, .max_memory_compact = -1, // Automatically set based on memory-mode diff --git a/pywfa/WFA2_lib/wavefront/wavefront_attributes.h b/pywfa/WFA2_lib/wavefront/wavefront_attributes.h index 5352c7c..41c4a6b 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_attributes.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_attributes.h @@ -59,6 +59,8 @@ typedef enum { typedef struct { // Mode alignment_span_t span; // Alignment form (End-to-end/Ends-free) + // Extension + bool extension; // Activate extension-like alignment // Ends-free int pattern_begin_free; // Allow free-gap at the beginning of the pattern int pattern_end_free; // Allow free-gap at the end of the pattern @@ -66,32 +68,12 @@ typedef struct { int text_end_free; // Allow free-gap at the end of the text } alignment_form_t; -/* - * Custom extend-match function, e.g.: - * - * typedef struct { - * char* pattern; - * int pattern_length; - * char* text; - * int text_length; - * } match_function_params_t; - * - * int match_function(int v,int h,void* arguments) { - * // Extract parameters - * match_function_params_t* match_arguments = (match_function_params_t*)arguments; - * // Check match - * if (v > match_arguments->pattern_length || h > match_arguments->text_length) return 0; - * return (match_arguments->pattern[v] == match_arguments->text[h]); - * } - */ -typedef int (*alignment_match_funct_t)(int,int,void*); - /* * Alignment system configuration */ typedef struct { // Limits - int max_alignment_score; // Maximum score allowed before quit + int max_alignment_steps; // Maximum WFA-steps allowed before quit // Probing intervals int probe_interval_global; // Score-ticks interval to check any limits int probe_interval_compact; // Score-ticks interval to check BT-buffer compacting @@ -102,9 +84,10 @@ typedef struct { uint64_t max_memory_abort; // Maximum memory allowed to be used before aborting alignment // Verbose // 0 - Quiet - // 1 - Report WFA progress and heavy tasks - // 2 - Report each sequence aligned (brief) - // 3 - Report each sequence aligned (very verbose) + // 1 - Report each sequence aligned (brief) + // 2 - Report each sequence/subsequence aligned (brief) + // 3 - Report WFA progress (heavy tasks) (verbose) + // 4 - Full report of each sequence/subsequence aligned (very verbose) int verbose; // Verbose (regulates messages during alignment) // Debug bool check_alignment_correct; // Verify that the alignment CIGAR output is correct @@ -141,13 +124,10 @@ typedef struct { wavefront_heuristic_t heuristic; // Wavefront heuristic // Memory model wavefront_memory_t memory_mode; // Wavefront memory strategy (modular wavefronts and piggyback) - // Custom function to compare sequences - alignment_match_funct_t match_funct; // Custom matching function (match(v,h,args)) - void* match_funct_arguments; // Generic arguments passed to matching function (args) // External MM (instead of allocating one inside) mm_allocator_t* mm_allocator; // MM-Allocator // Display - wavefront_plot_params_t plot_params; // Wavefront plot + wavefront_plot_attr_t plot; // Plot wavefront // System alignment_system_t system; // System related parameters } wavefront_aligner_attr_t; diff --git a/pywfa/WFA2_lib/wavefront/wavefront_backtrace.c b/pywfa/WFA2_lib/wavefront/wavefront_backtrace.c index c01719b..1a80331 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_backtrace.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_backtrace.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront-Alignment module for backtracing alignments */ +#include "utils/commons.h" #include "wavefront_backtrace.h" /* @@ -60,7 +61,7 @@ typedef enum { /* * Backtrace Trace Patch Match/Mismsmatch */ -int64_t wavefronts_backtrace_misms( +int64_t wavefront_backtrace_misms( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -74,7 +75,7 @@ int64_t wavefronts_backtrace_misms( return WAVEFRONT_OFFSET_NULL; } } -void wavefronts_backtrace_matches( +void wavefront_backtrace_matches( wavefront_aligner_t* const wf_aligner, const int k, wf_offset_t offset, @@ -101,7 +102,7 @@ void wavefronts_backtrace_matches( /* * Backtrace Trace Patch Deletion */ -int64_t wavefronts_backtrace_del1_open( +int64_t wavefront_backtrace_del1_open( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -115,7 +116,7 @@ int64_t wavefronts_backtrace_del1_open( return WAVEFRONT_OFFSET_NULL; } } -int64_t wavefronts_backtrace_del2_open( +int64_t wavefront_backtrace_del2_open( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -129,7 +130,7 @@ int64_t wavefronts_backtrace_del2_open( return WAVEFRONT_OFFSET_NULL; } } -int64_t wavefronts_backtrace_del1_ext( +int64_t wavefront_backtrace_del1_ext( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -143,7 +144,7 @@ int64_t wavefronts_backtrace_del1_ext( return WAVEFRONT_OFFSET_NULL; } } -int64_t wavefronts_backtrace_del2_ext( +int64_t wavefront_backtrace_del2_ext( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -160,7 +161,7 @@ int64_t wavefronts_backtrace_del2_ext( /* * Backtrace Trace Patch Insertion */ -int64_t wavefronts_backtrace_ins1_open( +int64_t wavefront_backtrace_ins1_open( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -174,7 +175,7 @@ int64_t wavefronts_backtrace_ins1_open( return WAVEFRONT_OFFSET_NULL; } } -int64_t wavefronts_backtrace_ins2_open( +int64_t wavefront_backtrace_ins2_open( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -188,7 +189,7 @@ int64_t wavefronts_backtrace_ins2_open( return WAVEFRONT_OFFSET_NULL; } } -int64_t wavefronts_backtrace_ins1_ext( +int64_t wavefront_backtrace_ins1_ext( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -202,7 +203,7 @@ int64_t wavefronts_backtrace_ins1_ext( return WAVEFRONT_OFFSET_NULL; } } -int64_t wavefronts_backtrace_ins2_ext( +int64_t wavefront_backtrace_ins2_ext( wavefront_aligner_t* const wf_aligner, const int score, const int k) { @@ -225,12 +226,14 @@ void wavefront_backtrace_linear( const int alignment_k, const wf_offset_t alignment_offset) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - const wavefronts_penalties_t* const wavefront_penalties = &(wf_aligner->penalties); + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + const wavefront_penalties_t* const penalties = &wf_aligner->penalties; + const distance_metric_t distance_metric = penalties->distance_metric; // Prepare cigar - cigar_t* const cigar = &wf_aligner->cigar; + cigar_t* const cigar = wf_aligner->cigar; + cigar_clear(cigar); cigar->end_offset = cigar->max_operations - 1; cigar->begin_offset = cigar->max_operations - 2; cigar->operations[cigar->end_offset] = '\0'; @@ -252,19 +255,21 @@ void wavefront_backtrace_linear( // Trace the alignment back while (v > 0 && h > 0 && score > 0) { // Compute scores - const int mismatch = score - wavefront_penalties->mismatch; - const int gap_open1 = score - wavefront_penalties->gap_opening1; + const int mismatch = score - penalties->mismatch; + const int gap_open1 = score - penalties->gap_opening1; // Compute source offsets const int64_t misms = (distance_metric != indel) ? - wavefronts_backtrace_misms(wf_aligner,mismatch,k) : + wavefront_backtrace_misms(wf_aligner,mismatch,k) : WAVEFRONT_OFFSET_NULL; - const int64_t ins = wavefronts_backtrace_ins1_open(wf_aligner,gap_open1,k); - const int64_t del = wavefronts_backtrace_del1_open(wf_aligner,gap_open1,k); + const int64_t ins = wavefront_backtrace_ins1_open(wf_aligner,gap_open1,k); + const int64_t del = wavefront_backtrace_del1_open(wf_aligner,gap_open1,k); const int64_t max_all = MAX(misms,MAX(ins,del)); + // Check source score + if (max_all < 0) break; // No source // Traceback Matches const int max_offset = BACKTRACE_PIGGYBACK_GET_OFFSET(max_all); const int num_matches = offset - max_offset; - wavefronts_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); + wavefront_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); offset = max_offset; // Update coordinates v = WAVEFRONT_V(k,offset); @@ -298,10 +303,10 @@ void wavefront_backtrace_linear( h = WAVEFRONT_H(k,offset); } // Account for last operations - if (v > 0 && h > 0) { // score == 0 + if (v > 0 && h > 0) { // Account for beginning series of matches const int num_matches = MIN(v,h); - wavefronts_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); + wavefront_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); v -= num_matches; h -= num_matches; } @@ -320,12 +325,14 @@ void wavefront_backtrace_affine( const int alignment_k, const wf_offset_t alignment_offset) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - const wavefronts_penalties_t* const wavefront_penalties = &(wf_aligner->penalties); + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + const wavefront_penalties_t* const penalties = &wf_aligner->penalties; + const distance_metric_t distance_metric = penalties->distance_metric; // Prepare cigar - cigar_t* const cigar = &wf_aligner->cigar; + cigar_t* const cigar = wf_aligner->cigar; + cigar_clear(cigar); cigar->end_offset = cigar->max_operations - 1; cigar->begin_offset = cigar->max_operations - 2; cigar->operations[cigar->end_offset] = '\0'; @@ -350,30 +357,31 @@ void wavefront_backtrace_affine( // Trace the alignment back while (v > 0 && h > 0 && score > 0) { // Compute scores - const int mismatch = score - wavefront_penalties->mismatch; - const int gap_open1 = score - wavefront_penalties->gap_opening1 - wavefront_penalties->gap_extension1; - const int gap_open2 = score - wavefront_penalties->gap_opening2 - wavefront_penalties->gap_extension2; - const int gap_extend1 = score - wavefront_penalties->gap_extension1; - const int gap_extend2 = score - wavefront_penalties->gap_extension2; + const int mismatch = score - penalties->mismatch; + const int gap_open1 = score - penalties->gap_opening1 - penalties->gap_extension1; + const int gap_open2 = score - penalties->gap_opening2 - penalties->gap_extension2; + const int gap_extend1 = score - penalties->gap_extension1; + const int gap_extend2 = score - penalties->gap_extension2; // Compute source offsets int64_t max_all; switch (matrix_type) { case affine2p_matrix_M: { - const int64_t misms = wavefronts_backtrace_misms(wf_aligner,mismatch,k); - const int64_t ins1_open = wavefronts_backtrace_ins1_open(wf_aligner,gap_open1,k); - const int64_t ins1_ext = wavefronts_backtrace_ins1_ext(wf_aligner,gap_extend1,k); + const int64_t misms = wavefront_backtrace_misms(wf_aligner,mismatch,k); + const int64_t ins1_open = wavefront_backtrace_ins1_open(wf_aligner,gap_open1,k); + const int64_t ins1_ext = wavefront_backtrace_ins1_ext(wf_aligner,gap_extend1,k); const int64_t max_ins1 = MAX(ins1_open,ins1_ext); - const int64_t del1_open = wavefronts_backtrace_del1_open(wf_aligner,gap_open1,k); - const int64_t del1_ext = wavefronts_backtrace_del1_ext(wf_aligner,gap_extend1,k); + const int64_t del1_open = wavefront_backtrace_del1_open(wf_aligner,gap_open1,k); + const int64_t del1_ext = wavefront_backtrace_del1_ext(wf_aligner,gap_extend1,k); const int64_t max_del1 = MAX(del1_open,del1_ext); if (distance_metric == gap_affine) { - max_all = MAX(misms,MAX(max_ins1,max_del1)); break; + max_all = MAX(misms,MAX(max_ins1,max_del1)); + break; } - const int64_t ins2_open = wavefronts_backtrace_ins2_open(wf_aligner,gap_open2,k); - const int64_t ins2_ext = wavefronts_backtrace_ins2_ext(wf_aligner,gap_extend2,k); + const int64_t ins2_open = wavefront_backtrace_ins2_open(wf_aligner,gap_open2,k); + const int64_t ins2_ext = wavefront_backtrace_ins2_ext(wf_aligner,gap_extend2,k); const int64_t max_ins2 = MAX(ins2_open,ins2_ext); - const int64_t del2_open = wavefronts_backtrace_del2_open(wf_aligner,gap_open2,k); - const int64_t del2_ext = wavefronts_backtrace_del2_ext(wf_aligner,gap_extend2,k); + const int64_t del2_open = wavefront_backtrace_del2_open(wf_aligner,gap_open2,k); + const int64_t del2_ext = wavefront_backtrace_del2_ext(wf_aligner,gap_extend2,k); const int64_t max_del2 = MAX(del2_open,del2_ext); const int64_t max_ins = MAX(max_ins1,max_ins2); const int64_t max_del = MAX(max_del1,max_del2); @@ -381,26 +389,26 @@ void wavefront_backtrace_affine( break; } case affine2p_matrix_I1: { - const int64_t ins1_open = wavefronts_backtrace_ins1_open(wf_aligner,gap_open1,k); - const int64_t ins1_ext = wavefronts_backtrace_ins1_ext(wf_aligner,gap_extend1,k); + const int64_t ins1_open = wavefront_backtrace_ins1_open(wf_aligner,gap_open1,k); + const int64_t ins1_ext = wavefront_backtrace_ins1_ext(wf_aligner,gap_extend1,k); max_all = MAX(ins1_open,ins1_ext); break; } case affine2p_matrix_I2: { - const int64_t ins2_open = wavefronts_backtrace_ins2_open(wf_aligner,gap_open2,k); - const int64_t ins2_ext = wavefronts_backtrace_ins2_ext(wf_aligner,gap_extend2,k); + const int64_t ins2_open = wavefront_backtrace_ins2_open(wf_aligner,gap_open2,k); + const int64_t ins2_ext = wavefront_backtrace_ins2_ext(wf_aligner,gap_extend2,k); max_all = MAX(ins2_open,ins2_ext); break; } case affine2p_matrix_D1: { - const int64_t del1_open = wavefronts_backtrace_del1_open(wf_aligner,gap_open1,k); - const int64_t del1_ext = wavefronts_backtrace_del1_ext(wf_aligner,gap_extend1,k); + const int64_t del1_open = wavefront_backtrace_del1_open(wf_aligner,gap_open1,k); + const int64_t del1_ext = wavefront_backtrace_del1_ext(wf_aligner,gap_extend1,k); max_all = MAX(del1_open,del1_ext); break; } case affine2p_matrix_D2: { - const int64_t del2_open = wavefronts_backtrace_del2_open(wf_aligner,gap_open2,k); - const int64_t del2_ext = wavefronts_backtrace_del2_ext(wf_aligner,gap_extend2,k); + const int64_t del2_open = wavefront_backtrace_del2_open(wf_aligner,gap_open2,k); + const int64_t del2_ext = wavefront_backtrace_del2_ext(wf_aligner,gap_extend2,k); max_all = MAX(del2_open,del2_ext); break; } @@ -409,18 +417,20 @@ void wavefront_backtrace_affine( exit(1); break; } - // Traceback Matches + // Check source score + if (max_all < 0) break; // No source + // Traceback matches if (matrix_type == affine2p_matrix_M) { const int max_offset = BACKTRACE_PIGGYBACK_GET_OFFSET(max_all); const int num_matches = offset - max_offset; - wavefronts_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); + wavefront_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); offset = max_offset; // Update coordinates v = WAVEFRONT_V(k,offset); h = WAVEFRONT_H(k,offset); if (v <= 0 || h <= 0) break; } - // Traceback Operation + // Traceback operation const backtrace_type backtrace_type = BACKTRACE_PIGGYBACK_GET_TYPE(max_all); switch (backtrace_type) { case backtrace_M: @@ -494,10 +504,10 @@ void wavefront_backtrace_affine( } // Account for last operations if (matrix_type == affine2p_matrix_M) { - if (v > 0 && h > 0) { // score == 0 + if (v > 0 && h > 0) { // Account for beginning series of matches const int num_matches = MIN(v,h); - wavefronts_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); + wavefront_backtrace_matches(wf_aligner,k,offset,num_matches,cigar); v -= num_matches; h -= num_matches; } @@ -506,10 +516,10 @@ void wavefront_backtrace_affine( while (h > 0) {cigar->operations[(cigar->begin_offset)--] = 'I'; --h;}; } else { // DEBUG - if (v != 0 || h != 0 || score != 0) { + if (v != 0 || h != 0 || (score != 0 && penalties->match == 0)) { fprintf(stderr,"[WFA::Backtrace] I?/D?-Beginning backtrace error\n"); - fprintf(stderr,">%.*s\n",pattern_length,wf_aligner->pattern); - fprintf(stderr,"<%.*s\n",text_length,wf_aligner->text); + fprintf(stderr,">%.*s\n",pattern_length,sequences->pattern); + fprintf(stderr,"<%.*s\n",text_length,sequences->text); exit(-1); } } @@ -544,18 +554,12 @@ void wavefront_backtrace_pcigar( const int end_v = WAVEFRONT_V(alignment_k,alignment_offset); const int end_h = WAVEFRONT_H(alignment_k,alignment_offset); if (wf_aligner->penalties.distance_metric <= gap_linear) { - wf_backtrace_buffer_unpack_cigar_linear(bt_buffer, - wf_aligner->pattern,wf_aligner->pattern_length, - wf_aligner->text,wf_aligner->text_length, - wf_aligner->match_funct, - wf_aligner->match_funct_arguments, - begin_v,begin_h,end_v,end_h,&wf_aligner->cigar); + wf_backtrace_buffer_unpack_cigar_linear( + bt_buffer,&wf_aligner->sequences, + begin_v,begin_h,end_v,end_h,wf_aligner->cigar); } else { - wf_backtrace_buffer_unpack_cigar_affine(bt_buffer, - wf_aligner->pattern,wf_aligner->pattern_length, - wf_aligner->text,wf_aligner->text_length, - wf_aligner->match_funct, - wf_aligner->match_funct_arguments, - begin_v,begin_h,end_v,end_h,&wf_aligner->cigar); + wf_backtrace_buffer_unpack_cigar_affine( + bt_buffer,&wf_aligner->sequences, + begin_v,begin_h,end_v,end_h,wf_aligner->cigar); } } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.c b/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.c index 22f11c8..ce81e1f 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.c @@ -29,7 +29,9 @@ * DESCRIPTION: WaveFront backtrace buffer to store bactrace-blocks */ +#include "utils/commons.h" #include "wavefront_backtrace_buffer.h" +#include "wavefront_sequences.h" /* * Config @@ -265,21 +267,18 @@ bt_block_t* wf_backtrace_buffer_traceback_pcigar( } void wf_backtrace_buffer_unpack_cigar_linear( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, const int end_h, cigar_t* const cigar) { - // Clear cigar - char* cigar_buffer = cigar->operations; - cigar->begin_offset = 0; + // Parameters + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Add init insertions/deletions + cigar_clear(cigar); + char* cigar_buffer = cigar->operations; int i; int v = begin_v; int h = begin_h; @@ -292,9 +291,7 @@ void wf_backtrace_buffer_unpack_cigar_linear( // Unpack block int cigar_block_length = 0; pcigar_unpack_linear( - palignment_blocks[i], - pattern,pattern_length,text,text_length, - match_funct,match_funct_arguments,&v,&h, + palignment_blocks[i],sequences,&v,&h, cigar_buffer,&cigar_block_length); // Update CIGAR cigar_buffer += cigar_block_length; @@ -313,21 +310,18 @@ void wf_backtrace_buffer_unpack_cigar_linear( } void wf_backtrace_buffer_unpack_cigar_affine( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, const int end_h, cigar_t* const cigar) { - // Clear cigar - char* cigar_buffer = cigar->operations; - cigar->begin_offset = 0; + // Parameters + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Add init insertions/deletions + cigar_clear(cigar); + char* cigar_buffer = cigar->operations; int i; int v = begin_v; int h = begin_h; @@ -341,9 +335,7 @@ void wf_backtrace_buffer_unpack_cigar_affine( // Unpack block int cigar_block_length = 0; pcigar_unpack_affine( - palignment_blocks[i], - pattern,pattern_length,text,text_length, - match_funct,match_funct_arguments,&v,&h, + palignment_blocks[i],sequences,&v,&h, cigar_buffer,&cigar_block_length,¤t_matrix_type); // Update CIGAR cigar_buffer += cigar_block_length; @@ -398,7 +390,7 @@ void wf_backtrace_buffer_mark_backtrace_batch( while (active_blocks < max_batch_size && next_idx < num_block_idxs) { // Check NULL const bt_block_idx_t block_idx = bt_block_idxs[next_idx]; - if (offsets[next_idx] >= 0 && + if (offsets[next_idx] >= 0 && block_idx >= num_compacted_blocks) { // NOTE block_idx != BT_BLOCK_IDX_NULL // Prefetch (bt-block and bt_block) BITMAP_PREFETCH_BLOCK(bitmap,block_idx); @@ -465,7 +457,7 @@ void wf_backtrace_buffer_mark_backtrace_batch( bt_block_idx_t wf_backtrace_buffer_compact_marked( wf_backtrace_buffer_t* const bt_buffer, bitmap_t* const bitmap, - const bool verbose) { + const int verbose) { // Parameters const int num_segments = vector_get_used(bt_buffer->segments); bt_block_t** const segments = vector_get_mem(bt_buffer->segments,bt_block_t*); @@ -517,7 +509,7 @@ bt_block_idx_t wf_backtrace_buffer_compact_marked( bt_buffer->block_next = write_block; bt_buffer->num_compactions++; // DEBUG - if (verbose) { + if (verbose >= 3) { fprintf(stderr,"[WFA::BacktraceBuffer] Compacted from %lu MB to %lu MB (%2.2f%%)", CONVERT_B_TO_MB(read_global_pos*sizeof(bt_block_t)), CONVERT_B_TO_MB(write_global_pos*sizeof(bt_block_t)), @@ -526,6 +518,3 @@ bt_block_idx_t wf_backtrace_buffer_compact_marked( // Return last index return write_global_pos - 1; } - - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.h b/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.h index bdfaa45..3b2f808 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_backtrace_buffer.h @@ -33,7 +33,6 @@ #define WAVEFRONT_BACKTRACE_BUFFER_H_ #include "alignment/cigar.h" -#include "utils/commons.h" #include "utils/vector.h" #include "utils/bitmap.h" #include "system/mm_allocator.h" @@ -120,12 +119,7 @@ bt_block_t* wf_backtrace_buffer_traceback_pcigar( bt_block_t* bt_block); void wf_backtrace_buffer_unpack_cigar_linear( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, @@ -133,12 +127,7 @@ void wf_backtrace_buffer_unpack_cigar_linear( cigar_t* const cigar); void wf_backtrace_buffer_unpack_cigar_affine( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, @@ -162,7 +151,7 @@ void wf_backtrace_buffer_mark_backtrace_batch( bt_block_idx_t wf_backtrace_buffer_compact_marked( wf_backtrace_buffer_t* const bt_buffer, bitmap_t* const bitmap, - const bool verbose); + const int verbose); /* * Utils diff --git a/pywfa/WFA2_lib/wavefront/wavefront_backtrace_offload.c b/pywfa/WFA2_lib/wavefront/wavefront_backtrace_offload.c index 4109f92..fc8c069 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_backtrace_offload.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_backtrace_offload.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for offloading partial backtraces */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "wfa.h" #include "wavefront_backtrace_offload.h" /* @@ -285,4 +286,3 @@ void wavefront_backtrace_offload_affine( wf_aligner,out_d2,out_d2_bt_pcigar,out_d2_bt_prev,lo,hi); } } - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_bialign.c b/pywfa/WFA2_lib/wavefront/wavefront_bialign.c index d6d2a7a..9d3b2d1 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_bialign.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_bialign.c @@ -28,34 +28,39 @@ * AUTHOR(S): Santiago Marco-Sola */ +#include "utils/commons.h" #include "wavefront_bialign.h" +#include "wavefront_unialign.h" +#include "wavefront_bialigner.h" -#include "wavefront_align.h" -#include "wavefront_extend.h" #include "wavefront_compute.h" -#include "wavefront_compute_edit.h" -#include "wavefront_compute_linear.h" #include "wavefront_compute_affine.h" #include "wavefront_compute_affine2p.h" -#include "wavefront_backtrace.h" +#include "wavefront_compute_edit.h" +#include "wavefront_compute_linear.h" +#include "wavefront_extend.h" +#include "wavefront_plot.h" +#include "wavefront_debug.h" /* * Config */ -#define WF_BIALIGN_FALLBACK_MIN_SCORE 100 +#define WF_BIALIGN_FALLBACK_MIN_SCORE 250 +#define WF_BIALIGN_FALLBACK_MIN_LENGTH 100 +#define WF_BIALIGN_RECOVERY_MIN_SCORE 500 /* * Debug */ void wavefront_bialign_debug( wf_bialign_breakpoint_t* const breakpoint, - const int rlevel) { + const int align_level) { // Parameters const int breakpoint_h = WAVEFRONT_H(breakpoint->k_forward,breakpoint->offset_forward); const int breakpoint_v = WAVEFRONT_V(breakpoint->k_forward,breakpoint->offset_forward); // Prinf debug info - fprintf(stderr,"[WFA::BiAlign][Recursion=%d] ",rlevel); - int i; for (i=0;iscore); switch (breakpoint->component) { @@ -69,25 +74,117 @@ void wavefront_bialign_debug( fprintf(stderr,")\n"); } /* - * Utils + * Init */ -int wavefront_bialign_gap_opening_adjustment( - wavefront_aligner_t* const wf_aligner, - const distance_metric_t distance_metric) { +void wavefront_bialign_init( + wavefront_bialigner_t* const bialigner, + const distance_metric_t distance_metric, + alignment_form_t* const form, + const affine2p_matrix_type component_begin, + const affine2p_matrix_type component_end, + const int align_level, + const int verbose) { + // Parameters + wavefront_aligner_t* const wf_forward = bialigner->wf_forward; + wavefront_aligner_t* const wf_reverse = bialigner->wf_reverse; + // Configure WF-compute function switch (distance_metric) { - case gap_affine: - return wf_aligner->penalties.gap_opening1; - case gap_affine_2p: - return MAX(wf_aligner->penalties.gap_opening1,wf_aligner->penalties.gap_opening2); case indel: case edit: + bialigner->wf_align_compute = &wavefront_compute_edit; + break; case gap_linear: + bialigner->wf_align_compute = &wavefront_compute_linear; + break; + case gap_affine: + bialigner->wf_align_compute = &wavefront_compute_affine; + break; + case gap_affine_2p: + bialigner->wf_align_compute = &wavefront_compute_affine2p; + break; default: - return 0; + fprintf(stderr,"[WFA] Distance function not implemented\n"); + exit(1); + break; + } + // Initialize wavefront-aligner (forward) + alignment_span_t span_forward = + (form->pattern_begin_free > 0 || form->text_begin_free > 0) ? + alignment_endsfree : alignment_end2end; + alignment_form_t form_forward = { + .span = span_forward, + .pattern_begin_free = form->pattern_begin_free, + .pattern_end_free = 0, + .text_begin_free = form->text_begin_free, + .text_end_free = 0, + }; + wf_forward->alignment_form = form_forward; + wf_forward->component_begin = component_begin; + wf_forward->component_end = component_end; + wavefront_aligner_init(wf_forward,align_level); + // Initialize wavefront-aligner (reverse) + alignment_span_t span_reverse = + (form->pattern_end_free > 0 || form->text_end_free > 0) ? + alignment_endsfree : alignment_end2end; + alignment_form_t form_reverse = { + .span = span_reverse, + .pattern_begin_free = form->pattern_end_free, + .pattern_end_free = 0, + .text_begin_free = form->text_end_free, + .text_end_free = 0, + }; + wf_reverse->alignment_form = form_reverse; + wf_reverse->component_begin = component_end; + wf_reverse->component_end = component_begin; + wavefront_aligner_init(wf_reverse,align_level); + // Plot + const bool plot_enabled = (wf_forward->plot != NULL); + if (plot_enabled) { + wavefront_plot(wf_forward,0,align_level); + wavefront_plot(wf_reverse,0,align_level); + } + // DEBUG + if (verbose >= 2) { + wavefront_debug_begin(wf_forward); + wavefront_debug_begin(wf_reverse); } } /* - * Bialign check breakpoints + * Bidirectional Alignment (base cases) + */ +int wavefront_bialign_base( + wavefront_aligner_t* const wf_aligner, + alignment_form_t* const form, + const affine2p_matrix_type component_begin, + const affine2p_matrix_type component_end, + const int align_level) { + // Parameters + wavefront_aligner_t* const wf_base = wf_aligner->bialigner->wf_base; + const int verbose = wf_base->system.verbose; + // Configure + wf_base->alignment_form = *form; + wavefront_unialign_init(wf_base,component_begin,component_end); + // DEBUG + if (verbose >= 2) wavefront_debug_begin(wf_base); + // Wavefront align sequences + wavefront_unialign(wf_base); + // DEBUG + if (verbose >= 2) { + wavefront_debug_end(wf_base); + wavefront_debug_check_correct(wf_base); + } + // Append CIGAR + cigar_append_forward(wf_aligner->cigar,wf_base->cigar); + // Set status and return + const int align_status = wf_base->align_status.status; + if (align_status == WF_STATUS_ALG_COMPLETED) { + return WF_STATUS_OK; + } else { + return WF_STATUS_UNATTAINABLE; + } +} +/* + * Bidirectional check breakpoints */ void wavefront_bialign_breakpoint_indel2indel( wavefront_aligner_t* const wf_aligner, @@ -99,8 +196,9 @@ void wavefront_bialign_breakpoint_indel2indel( const affine2p_matrix_type component, wf_bialign_breakpoint_t* const breakpoint) { // Parameters - const int text_length = wf_aligner->text_length; - const int pattern_length = wf_aligner->pattern_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; const int gap_open = (component==affine2p_matrix_I1 || component==affine2p_matrix_D1) ? wf_aligner->penalties.gap_opening1 : wf_aligner->penalties.gap_opening2; @@ -119,11 +217,16 @@ void wavefront_bialign_breakpoint_indel2indel( // Fetch offsets const wf_offset_t doffset_0 = dwf_0->offsets[k_0]; const wf_offset_t doffset_1 = dwf_1->offsets[k_1]; - const int dh_0 = WAVEFRONT_H(k_forward,doffset_0); - const int dh_1 = WAVEFRONT_H(k_reverse,doffset_1); + const int dh_0 = WAVEFRONT_H(k_0,doffset_0); + const int dh_1 = WAVEFRONT_H(k_1,doffset_1); // Check breakpoint d2d if (dh_0 + dh_1 >= text_length && score_0 + score_1 - gap_open < breakpoint->score) { if (breakpoint_forward) { + // Check out-of-bounds coordinates + const int v = WAVEFRONT_V(k_0,dh_0); + const int h = WAVEFRONT_H(k_0,dh_0); + if (v > pattern_length || h > text_length) continue; + // Set breakpoint breakpoint->score_forward = score_0; breakpoint->score_reverse = score_1; breakpoint->k_forward = k_0; @@ -131,6 +234,11 @@ void wavefront_bialign_breakpoint_indel2indel( breakpoint->offset_forward = dh_0; breakpoint->offset_reverse = dh_1; } else { + // Check out-of-bounds coordinates + const int v = WAVEFRONT_V(k_1,dh_1); + const int h = WAVEFRONT_H(k_1,dh_1); + if (v > pattern_length || h > text_length) continue; + // Set breakpoint breakpoint->score_forward = score_1; breakpoint->score_reverse = score_0; breakpoint->k_forward = k_1; @@ -140,6 +248,7 @@ void wavefront_bialign_breakpoint_indel2indel( } breakpoint->score = score_0 + score_1 - gap_open; breakpoint->component = component; + // wavefront_bialign_debug(breakpoint,-1); // DEBUG // No need to keep searching return; } @@ -154,8 +263,9 @@ void wavefront_bialign_breakpoint_m2m( wavefront_t* const mwf_1, wf_bialign_breakpoint_t* const breakpoint) { // Parameters - const int text_length = wf_aligner->text_length; - const int pattern_length = wf_aligner->pattern_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; // Check wavefronts overlapping const int lo_0 = mwf_0->lo; const int hi_0 = mwf_0->hi; @@ -171,8 +281,8 @@ void wavefront_bialign_breakpoint_m2m( // Fetch offsets const wf_offset_t moffset_0 = mwf_0->offsets[k_0]; const wf_offset_t moffset_1 = mwf_1->offsets[k_1]; - const int mh_0 = WAVEFRONT_H(k_forward,moffset_0); - const int mh_1 = WAVEFRONT_H(k_reverse,moffset_1); + const int mh_0 = WAVEFRONT_H(k_0,moffset_0); + const int mh_1 = WAVEFRONT_H(k_1,moffset_1); // Check breakpoint m2m if (mh_0 + mh_1 >= text_length && score_0 + score_1 < breakpoint->score) { if (breakpoint_forward) { @@ -192,13 +302,14 @@ void wavefront_bialign_breakpoint_m2m( } breakpoint->score = score_0 + score_1; breakpoint->component = affine2p_matrix_M; + // wavefront_bialign_debug(breakpoint,-1); // DEBUG // No need to keep searching return; } } } /* - * Bialign find overlaps + * Bidirectional find overlaps */ void wavefront_bialign_overlap( wavefront_aligner_t* const wf_aligner_0, @@ -235,7 +346,7 @@ void wavefront_bialign_overlap( const int score_mod_i = score_i % max_score_scope; // Check I2/D2-breakpoints (gap_affine_2p) if (distance_metric == gap_affine_2p) { - if (score_0 + score_1 - gap_opening2 >= breakpoint->score) continue; + if (score_0 + score_i - gap_opening2 >= breakpoint->score) continue; // Check breakpoint d2d wavefront_t* const d2wf_1 = wf_aligner_1->wf_components.d2wavefronts[score_mod_i]; if (d2wf_0 != NULL && d2wf_1 != NULL) { @@ -253,7 +364,7 @@ void wavefront_bialign_overlap( } // Check I1/D1-breakpoints (gap_affine) if (distance_metric >= gap_affine) { - if (score_0 + score_1 - gap_opening1 >= breakpoint->score) continue; + if (score_0 + score_i - gap_opening1 >= breakpoint->score) continue; // Check breakpoint d2d wavefront_t* const d1wf_1 = wf_aligner_1->wf_components.d1wavefronts[score_mod_i]; if (d1wf_0 != NULL && d1wf_1 != NULL) { @@ -270,7 +381,7 @@ void wavefront_bialign_overlap( } } // Check M-breakpoints (indel, edit, gap-linear) - if (score_0 + score_1 >= breakpoint->score) continue; + if (score_0 + score_i >= breakpoint->score) continue; wavefront_t* const mwf_1 = wf_aligner_1->wf_components.mwavefronts[score_mod_i]; if (mwf_1 != NULL) { wavefront_bialign_breakpoint_m2m( @@ -280,157 +391,163 @@ void wavefront_bialign_overlap( } } /* - * Bialign breakpoint detection + * Bidirectional breakpoint detection */ -void wavefront_bialign_find_breakpoint_init( - wavefront_aligner_t* const wf_aligner_forward, - wavefront_aligner_t* const wf_aligner_reverse, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const distance_metric_t distance_metric, - alignment_form_t* const form, - const affine2p_matrix_type component_begin, - const affine2p_matrix_type component_end) { - // Resize wavefront aligner - wavefront_aligner_resize(wf_aligner_forward,pattern,pattern_length,text,text_length,false); - wavefront_aligner_resize(wf_aligner_reverse,pattern,pattern_length,text,text_length,true); - // Configure form forward and reverse - alignment_span_t span_forward = - (form->pattern_begin_free > 0 || form->text_begin_free > 0) ? alignment_endsfree : alignment_end2end; - alignment_form_t form_forward = { - .span = span_forward, - .pattern_begin_free = form->pattern_begin_free, - .pattern_end_free = 0, - .text_begin_free = form->text_begin_free, - .text_end_free = 0, - }; - alignment_span_t span_reverse = - (form->pattern_end_free > 0 || form->text_end_free > 0) ? alignment_endsfree : alignment_end2end; - alignment_form_t form_reverse = { - .span = span_reverse, - .pattern_begin_free = form->pattern_end_free, - .pattern_end_free = 0, - .text_begin_free = form->text_end_free, - .text_end_free = 0, - }; - // Configure WF-compute function (global) +int wavefront_bialign_overlap_gopen_adjust( + wavefront_aligner_t* const wf_aligner, + const distance_metric_t distance_metric) { switch (distance_metric) { + case gap_affine: + return wf_aligner->penalties.gap_opening1; + case gap_affine_2p: + return MAX(wf_aligner->penalties.gap_opening1,wf_aligner->penalties.gap_opening2); case indel: case edit: - wf_aligner_forward->align_status.wf_align_compute = &wavefront_compute_edit; - break; case gap_linear: - wf_aligner_forward->align_status.wf_align_compute = &wavefront_compute_linear; - break; - case gap_affine: - wf_aligner_forward->align_status.wf_align_compute = &wavefront_compute_affine; - break; - case gap_affine_2p: - wf_aligner_forward->align_status.wf_align_compute = &wavefront_compute_affine2p; - break; default: - fprintf(stderr,"[WFA] Distance function not implemented\n"); - exit(1); - break; - } - // Initialize wavefront (forward) - wf_aligner_forward->alignment_form = form_forward; - wf_aligner_forward->component_begin = component_begin; - if (span_forward == alignment_end2end) { - wavefront_align_end2end_initialize(wf_aligner_forward); - } else { - wavefront_align_endsfree_initialize(wf_aligner_forward,pattern_length,text_length); - } - // Initialize wavefront (reverse) - wf_aligner_reverse->alignment_form = form_reverse; - wf_aligner_reverse->component_begin = component_end; - if (span_reverse == alignment_end2end) { - wavefront_align_end2end_initialize(wf_aligner_reverse); - } else { - wavefront_align_endsfree_initialize(wf_aligner_reverse,pattern_length,text_length); + return 0; } } -void wavefront_bialign_find_breakpoint( - wavefront_aligner_t* const wf_aligner_forward, - wavefront_aligner_t* const wf_aligner_reverse, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, +int wavefront_bialign_find_breakpoint( + wavefront_bialigner_t* const bialigner, const distance_metric_t distance_metric, alignment_form_t* const form, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end, - wf_bialign_breakpoint_t* const breakpoint) { + wf_bialign_breakpoint_t* const breakpoint, + const int align_level) { + // Parameters + wavefront_aligner_t* const wf_forward = bialigner->wf_forward; + wavefront_aligner_t* const wf_reverse = bialigner->wf_reverse; + alignment_system_t* const system = &wf_forward->system; + const bool plot_enabled = (wf_forward->plot != NULL); + const int verbose = system->verbose; // Init bialignment - wavefront_bialign_find_breakpoint_init( - wf_aligner_forward,wf_aligner_reverse, - pattern,pattern_length,text,text_length, - distance_metric,form,component_begin,component_end); - // Compute wavefronts of increasing score until both wavefronts overlap - const int max_antidiagonal = DPMATRIX_ANTIDIAGONAL(pattern_length,text_length) - 1; - void (*wf_align_compute)(wavefront_aligner_t* const,const int) = wf_aligner_forward->align_status.wf_align_compute; + wavefront_bialign_init(bialigner,distance_metric,form,component_begin,component_end,align_level,verbose); + // Sequences + wavefront_sequences_t* const sequences = &wf_forward->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; + // Operators + void (*wf_align_compute)(wavefront_aligner_t* const,const int) = bialigner->wf_align_compute; + // Parameters + const int max_alignment_steps = wf_forward->system.max_alignment_steps; + const int max_antidiagonal = DPMATRIX_ANTIDIAGONAL(pattern_length,text_length) - 1; // Note: Even removing -1 + int score_forward = 0, score_reverse = 0, forward_max_ak = 0, reverse_max_ak = 0; + bool reachability_quit; + // Prepare and perform first bialignment step breakpoint->score = INT_MAX; - int score_forward = 0, score_reverse = 0; - int forward_max_ak = wavefront_extend_end2end_max(wf_aligner_forward,score_forward); - int reverse_max_ak = wavefront_extend_end2end_max(wf_aligner_reverse,score_reverse); - int max_ak; - bool last_wf_forward; + reachability_quit = wavefront_extend_end2end_max(wf_forward,score_forward,&forward_max_ak); + if (reachability_quit) return wf_forward->align_status.status; + reachability_quit = wavefront_extend_end2end_max(wf_reverse,score_reverse,&reverse_max_ak); + if (reachability_quit) return wf_reverse->align_status.status; + // Compute wavefronts of increasing score until both wavefronts overlap + int max_ak = 0; + bool last_wf_forward = false; while (true) { - // Check if they are close to collision + // Check close-to-collision if (forward_max_ak + reverse_max_ak >= max_antidiagonal) break; - // Compute-next & extend wavefront forward + /* + * Compute next wavefront (Forward) + */ ++score_forward; - (*wf_align_compute)(wf_aligner_forward,score_forward); - max_ak = wavefront_extend_end2end_max(wf_aligner_forward,score_forward); + (*wf_align_compute)(wf_forward,score_forward); + if (plot_enabled) wavefront_plot(wf_forward,score_forward,align_level); // Plot + // Extend + reachability_quit = wavefront_extend_end2end_max(wf_forward,score_forward,&max_ak); if (forward_max_ak < max_ak) forward_max_ak = max_ak; last_wf_forward = true; - // Check if they are close to collision + // Check end-reached and close-to-collision + if (reachability_quit) return wf_forward->align_status.status; if (forward_max_ak + reverse_max_ak >= max_antidiagonal) break; - // Compute-next & extend wavefront reverse + /* + * Compute next wavefront (Reverse) + */ ++score_reverse; - (*wf_align_compute)(wf_aligner_reverse,score_reverse); - max_ak = wavefront_extend_end2end_max(wf_aligner_reverse,score_reverse); + (*wf_align_compute)(wf_reverse,score_reverse); + if (plot_enabled) wavefront_plot(wf_reverse,score_reverse,align_level); // Plot + // Extend + reachability_quit = wavefront_extend_end2end_max(wf_reverse,score_reverse,&max_ak); if (reverse_max_ak < max_ak) reverse_max_ak = max_ak; last_wf_forward = false; + // Check end-reached and max-steps-reached + if (reachability_quit) return wf_reverse->align_status.status; + if (score_reverse + score_forward >= max_alignment_steps) return WF_STATUS_MAX_STEPS_REACHED; + // DEBUG + if (verbose >= 3 && score_forward % system->probe_interval_global == 0) { + wavefront_unialign_print_status(stderr,wf_forward,score_forward); + } } // Advance until overlap is found - const int max_score_scope = wf_aligner_forward->wf_components.max_score_scope; - const int gap_opening = wavefront_bialign_gap_opening_adjustment(wf_aligner_forward,distance_metric); + const int max_score_scope = wf_forward->wf_components.max_score_scope; + const int gap_opening = wavefront_bialign_overlap_gopen_adjust(wf_forward,distance_metric); while (true) { if (last_wf_forward) { // Check overlapping wavefronts const int min_score_reverse = (score_reverse > max_score_scope-1) ? score_reverse - (max_score_scope-1) : 0; if (score_forward + min_score_reverse - gap_opening >= breakpoint->score) break; // Done! - wavefront_bialign_overlap(wf_aligner_forward,wf_aligner_reverse,score_forward,score_reverse,true,breakpoint); - // Compute-next and extend reverse-wavefront + wavefront_bialign_overlap(wf_forward,wf_reverse,score_forward,score_reverse,true,breakpoint); + /* + * Compute next wavefront (Reverse) + */ ++score_reverse; - (*wf_align_compute)(wf_aligner_reverse,score_reverse); - wavefront_extend_end2end(wf_aligner_reverse,score_reverse); + (*wf_align_compute)(wf_reverse,score_reverse); + if (plot_enabled) wavefront_plot(wf_reverse,score_reverse,align_level); // Plot + // Extend & check end-reached + reachability_quit = wavefront_extend_end2end(wf_reverse,score_reverse); + if (reachability_quit) return wf_reverse->align_status.status; } // Check overlapping wavefronts const int min_score_forward = (score_forward > max_score_scope-1) ? score_forward - (max_score_scope-1) : 0; if (min_score_forward + score_reverse - gap_opening >= breakpoint->score) break; // Done! - wavefront_bialign_overlap(wf_aligner_reverse,wf_aligner_forward,score_reverse,score_forward,false,breakpoint); - // Compute-next and extend forward-wavefront + wavefront_bialign_overlap(wf_reverse,wf_forward,score_reverse,score_forward,false,breakpoint); + /* + * Compute next wavefront (Forward) + */ ++score_forward; - (*wf_align_compute)(wf_aligner_forward,score_forward); - wavefront_extend_end2end(wf_aligner_forward,score_forward); + (*wf_align_compute)(wf_forward,score_forward); + if (plot_enabled) wavefront_plot(wf_forward,score_forward,align_level); // Plot + // Extend & check end-reached/max-steps-reached + reachability_quit = wavefront_extend_end2end(wf_forward,score_forward); + if (reachability_quit) return wf_forward->align_status.status; + if (score_reverse + score_forward >= max_alignment_steps) return WF_STATUS_MAX_STEPS_REACHED; // Enable always last_wf_forward = true; } + // Return OK + return WF_STATUS_OK; +} +int wavefront_bialign_find_breakpoint_exception( + wavefront_aligner_t* const wf_aligner, + alignment_form_t* const form, + const affine2p_matrix_type component_begin, + const affine2p_matrix_type component_end, + const int align_level, + const int align_status) { + // Breakpoint was not found, check end reached + if (align_status == WF_STATUS_END_REACHED) { + wavefront_aligner_t* const wf_forward = wf_aligner->bialigner->wf_forward; + wavefront_aligner_t* const wf_reverse = wf_aligner->bialigner->wf_reverse; + // Retrieve score when end was reached + int score_reached; + if (wf_forward->align_status.status == WF_STATUS_END_REACHED) { + score_reached = wf_forward->align_status.score; + } else { + score_reached = wf_reverse->align_status.score; + } + // Fallback if possible + if (score_reached <= WF_BIALIGN_RECOVERY_MIN_SCORE) { + return wavefront_bialign_base(wf_aligner,form,component_begin,component_end,align_level); + } else { + return WF_STATUS_END_UNREACHABLE; // To no avail + } + } else { // Other unrecoverable conditions + return align_status; + } } /* * Bidirectional Alignment */ -int wavefront_align_unidirectional( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length); void wavefront_bialign_init_half_0( alignment_form_t* const global_form, alignment_form_t* const half_form) { @@ -440,6 +557,7 @@ void wavefront_bialign_init_half_0( global_form->text_begin_free > 0) ? alignment_endsfree : alignment_end2end; half_form->span = span_0; + half_form->extension = false; half_form->pattern_begin_free = global_form->pattern_begin_free; half_form->pattern_end_free = 0; half_form->text_begin_free = global_form->text_begin_free; @@ -454,74 +572,159 @@ void wavefront_bialign_init_half_1( global_form->text_begin_free > 0) ? alignment_endsfree : alignment_end2end; half_form->span = span_1; + half_form->extension = false; half_form->pattern_begin_free = 0; half_form->pattern_end_free = global_form->pattern_end_free; half_form->text_begin_free = 0; half_form->text_end_free = global_form->text_end_free; } -void wavefront_bialign( +int wavefront_bialign_alignment( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, alignment_form_t* const form, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end, const int score_remaining, - cigar_t* const cigar, - const int rlevel) { + const int align_level) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->bialigner->wf_forward->sequences; + const int pattern_begin = sequences->pattern_begin; + const int pattern_end = sequences->pattern_begin + sequences->pattern_length; + const int text_begin = sequences->text_begin; + const int text_end = sequences->text_begin + sequences->text_length; + const int pattern_length = pattern_end - pattern_begin; + const int text_length = text_end - text_begin; // Trivial cases if (text_length == 0) { - cigar_append_deletion(cigar,pattern_length); - return; + cigar_append_deletion(wf_aligner->cigar,pattern_length); + return WF_STATUS_OK; } else if (pattern_length == 0) { - cigar_append_insertion(cigar,text_length); - return; - } - // Fallback to regular WFA - if (score_remaining <= WF_BIALIGN_FALLBACK_MIN_SCORE) { - // Align the remaining - wf_aligner->component_begin = component_begin; - wf_aligner->component_end = component_end; - wf_aligner->alignment_form = *form; - wavefront_align_unidirectional(wf_aligner, - pattern,pattern_length, - text,text_length); - cigar_append(cigar,&wf_aligner->cigar); - return; + cigar_append_insertion(wf_aligner->cigar,text_length); + return WF_STATUS_OK; + } else if (score_remaining <= WF_BIALIGN_FALLBACK_MIN_SCORE) { + // Fall back to regular WFA + return wavefront_bialign_base(wf_aligner,form, + component_begin,component_end,align_level); } // Find breakpoint in the alignment wf_bialign_breakpoint_t breakpoint; - wavefront_bialign_find_breakpoint( - wf_aligner->aligner_forward,wf_aligner->aligner_reverse, - pattern,pattern_length,text,text_length, - wf_aligner->penalties.distance_metric, - form,component_begin,component_end,&breakpoint); + int align_status = wavefront_bialign_find_breakpoint( + wf_aligner->bialigner,wf_aligner->penalties.distance_metric, + form,component_begin,component_end,&breakpoint,align_level); + // DEBUG + if (wf_aligner->system.verbose >= 2) { + wf_aligner->bialigner->wf_forward->align_status.status = align_status; + wf_aligner->bialigner->wf_reverse->align_status.status = align_status; + wavefront_debug_end(wf_aligner->bialigner->wf_forward); + wavefront_debug_end(wf_aligner->bialigner->wf_reverse); + } + // Check status + if (align_status != WF_STATUS_OK) { + return wavefront_bialign_find_breakpoint_exception( + wf_aligner,form,component_begin,component_end,align_level,align_status); + } + // Breakpoint found const int breakpoint_h = WAVEFRONT_H(breakpoint.k_forward,breakpoint.offset_forward); const int breakpoint_v = WAVEFRONT_V(breakpoint.k_forward,breakpoint.offset_forward); // DEBUG - if (wf_aligner->system.verbose >= 2) wavefront_bialign_debug(&breakpoint,rlevel); + if (wf_aligner->system.verbose >= 3) wavefront_bialign_debug(&breakpoint,align_level); // Align half_0 alignment_form_t form_0; + wavefront_bialigner_set_sequences_bounds(wf_aligner->bialigner, + pattern_begin,pattern_begin+breakpoint_v, + text_begin,text_begin+breakpoint_h); wavefront_bialign_init_half_0(form,&form_0); - wavefront_bialign( - wf_aligner,pattern,breakpoint_v,text,breakpoint_h, + align_status = wavefront_bialign_alignment(wf_aligner, &form_0,component_begin,breakpoint.component, - breakpoint.score_forward,cigar,rlevel+1); + breakpoint.score_forward,align_level+1); + if (align_status != WF_STATUS_OK) return align_status; // Align half_1 alignment_form_t form_1; + wavefront_bialigner_set_sequences_bounds(wf_aligner->bialigner, + pattern_begin+breakpoint_v,pattern_end, + text_begin+breakpoint_h,text_end); wavefront_bialign_init_half_1(form,&form_1); - wavefront_bialign(wf_aligner, - pattern+breakpoint_v,pattern_length-breakpoint_v, - text+breakpoint_h,text_length-breakpoint_h, + align_status = wavefront_bialign_alignment(wf_aligner, &form_1,breakpoint.component,component_end, - breakpoint.score_reverse,cigar,rlevel+1); - // Set score - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - const int swg_match_score = wf_aligner->penalties.match; - cigar->score = (distance_metric <= edit) ? breakpoint.score : - WF_PENALTIES_GET_SW_SCORE(swg_match_score,pattern_length,text_length,breakpoint.score); + breakpoint.score_reverse,align_level+1); + if (align_status != WF_STATUS_OK) return align_status; + // Set score (Strictly speaking, only needed at level-0) + if (align_level == 0) { + cigar_t* const cigar = wf_aligner->cigar; + cigar->score = wavefront_compute_classic_score(wf_aligner,pattern_length,text_length,breakpoint.score); + cigar->end_v = pattern_length; + cigar->end_h = text_length; + } + return WF_STATUS_OK; // All good +} +/* + * Bidirectional Score-only + */ +int wavefront_bialign_compute_score( + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_aligner_t* const wf_forward = wf_aligner->bialigner->wf_forward; + wavefront_aligner_t* const wf_reverse = wf_aligner->bialigner->wf_reverse; + wavefront_sequences_t* const sequences = &wf_forward->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; + // Clear cigar + cigar_clear(wf_aligner->cigar); + // Find breakpoint in the alignment + wf_bialign_breakpoint_t breakpoint; + const int align_status = wavefront_bialign_find_breakpoint(wf_aligner->bialigner, + wf_aligner->penalties.distance_metric,&wf_aligner->alignment_form, + affine_matrix_M,affine_matrix_M,&breakpoint,0); + // DEBUG + if (wf_aligner->system.verbose >= 2) { + wavefront_debug_end(wf_forward); + wavefront_debug_end(wf_reverse); + } + // Check status + cigar_t* const cigar = wf_aligner->cigar; + if (align_status == WF_STATUS_OK || align_status == WF_STATUS_END_REACHED) { + if (align_status == WF_STATUS_END_REACHED) { + breakpoint.score = (wf_forward->align_status.status == WF_STATUS_END_REACHED) ? + wf_forward->align_status.score : wf_reverse->align_status.score; + } + // Set status & score + cigar->score = wavefront_compute_classic_score(wf_aligner,pattern_length,text_length,breakpoint.score); + cigar->end_v = pattern_length; + cigar->end_h = text_length; + // Return OK + return WF_STATUS_OK; + } else { + // Other cases + return align_status; + } +} +/* + * Bidirectional dispatcher + */ +void wavefront_bialign( + wavefront_aligner_t* const wf_aligner) { + // Select scope + int align_status; + if (wf_aligner->alignment_scope == compute_score) { + align_status = wavefront_bialign_compute_score(wf_aligner); + } else { + // Resize CIGAR + wavefront_sequences_t* const sequences = &wf_aligner->bialigner->wf_forward->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; + cigar_resize(wf_aligner->cigar,2*(pattern_length+text_length)); // Resize & clear + // Bidirectional alignment + const bool min_length = MAX(pattern_length,text_length) <= WF_BIALIGN_FALLBACK_MIN_LENGTH; + align_status = wavefront_bialign_alignment(wf_aligner, + &wf_aligner->alignment_form, + affine_matrix_M,affine_matrix_M, + min_length ? 0 : INT_MAX,0); + } + // Check status + if (align_status == WF_STATUS_OK) { + wf_aligner->align_status.status = WF_STATUS_ALG_COMPLETED; + } else if (align_status == WF_STATUS_MAX_STEPS_REACHED || align_status == WF_STATUS_OOM) { + wf_aligner->align_status.status = align_status; + } else { // Other cases + wf_aligner->align_status.status = WF_STATUS_UNATTAINABLE; + } } - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_bialign.h b/pywfa/WFA2_lib/wavefront/wavefront_bialign.h index f740761..7a96a0d 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_bialign.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_bialign.h @@ -28,45 +28,15 @@ * AUTHOR(S): Santiago Marco-Sola */ -#ifndef WAVEFRONT_WAVEFRONT_BIALIGN_H_ -#define WAVEFRONT_WAVEFRONT_BIALIGN_H_ +#ifndef WAVEFRONT_BIALIGN_H_ +#define WAVEFRONT_BIALIGN_H_ -#include "utils/commons.h" -#include "alignment/affine_penalties.h" -#include "alignment/cigar.h" -#include "wavefront_offset.h" -#include "wavefront_attributes.h" - -// Wavefront ahead definition -typedef struct _wavefront_aligner_t wavefront_aligner_t; - -typedef struct { - // Scores - int score; // Score total - int score_forward; // Score (forward) - int score_reverse; // Score (reverse) - // Location - int k_forward; // Breakpoint diagonal (forward) - int k_reverse; // Breakpoint diagonal (reverse) - wf_offset_t offset_forward; // Offset (forward) - wf_offset_t offset_reverse; // Offset (reverse) - affine2p_matrix_type component; // Component (M/I/D) -} wf_bialign_breakpoint_t; +#include "wavefront_aligner.h" /* * Bidirectional WFA */ void wavefront_bialign( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_form_t* const form, - const affine2p_matrix_type component_begin, - const affine2p_matrix_type component_end, - const int score_remaining, - cigar_t* const cigar, - const int rlevel); + wavefront_aligner_t* const wf_aligner); -#endif /* WAVEFRONT_WAVEFRONT_BIALIGN_H_ */ +#endif /* WAVEFRONT_BIALIGN_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_bialigner.c b/pywfa/WFA2_lib/wavefront/wavefront_bialigner.c new file mode 100644 index 0000000..f8eb7d1 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_bialigner.c @@ -0,0 +1,199 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#include "utils/commons.h" +#include "wavefront_bialigner.h" +#include "wavefront_aligner.h" +#include "wavefront_attributes.h" +#include "wavefront_heuristic.h" + +/* + * Setup + */ +wavefront_bialigner_t* wavefront_bialigner_new( + wavefront_aligner_attr_t* const attributes, + wavefront_plot_t* const plot) { + // Allocate + wavefront_bialigner_t* const wf_bialigner = malloc(sizeof(wavefront_bialigner_t)); + // Configure subsidiary aligners + wavefront_aligner_attr_t subsidiary_attr = wavefront_aligner_attr_default; + // Inherit attributes from master aligner + subsidiary_attr.distance_metric = attributes->distance_metric; + subsidiary_attr.linear_penalties = attributes->linear_penalties; + subsidiary_attr.affine_penalties = attributes->affine_penalties; + subsidiary_attr.affine2p_penalties = attributes->affine2p_penalties; + // Set specifics for subsidiary aligners + subsidiary_attr.heuristic = attributes->heuristic; // Inherit same heuristic + subsidiary_attr.memory_mode = wavefront_memory_high; // Classic WFA + subsidiary_attr.alignment_scope = compute_score; // BiWFAs are score-only + subsidiary_attr.alignment_form.extension = false; // Deactivate extension mode + // Set other parameter for subsidiary aligners + subsidiary_attr.system = attributes->system; // Inherit system configuration + // Allocate forward/reverse aligners + wf_bialigner->wf_forward = wavefront_aligner_new(&subsidiary_attr); + wf_bialigner->wf_forward->align_mode = wf_align_biwfa_breakpoint_forward; + wf_bialigner->wf_forward->plot = plot; + wf_bialigner->wf_reverse = wavefront_aligner_new(&subsidiary_attr); + wf_bialigner->wf_reverse->align_mode = wf_align_biwfa_breakpoint_reverse; + wf_bialigner->wf_reverse->plot = plot; + // Allocate subsidiary aligner + subsidiary_attr.alignment_scope = compute_alignment; + subsidiary_attr.heuristic.strategy = wf_heuristic_none; // Not inherited + wf_bialigner->wf_base = wavefront_aligner_new(&subsidiary_attr); + wf_bialigner->wf_base->align_mode = wf_align_biwfa_subsidiary; + wf_bialigner->wf_base->plot = plot; + // Return + return wf_bialigner; +} +void wavefront_bialigner_reap( + wavefront_bialigner_t* const wf_bialigner) { + wavefront_aligner_reap(wf_bialigner->wf_forward); + wavefront_aligner_reap(wf_bialigner->wf_reverse); + wavefront_aligner_reap(wf_bialigner->wf_base); +} +void wavefront_bialigner_delete( + wavefront_bialigner_t* const wf_bialigner) { + wavefront_aligner_delete(wf_bialigner->wf_forward); + wavefront_aligner_delete(wf_bialigner->wf_reverse); + wavefront_aligner_delete(wf_bialigner->wf_base); + free(wf_bialigner); +} +/* + * Sequences + */ +void wavefront_bialigner_set_sequences_ascii( + wavefront_bialigner_t* const wf_bialigner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length) { + wavefront_sequences_init_ascii( + &wf_bialigner->wf_forward->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_sequences_init_ascii( + &wf_bialigner->wf_reverse->sequences, + pattern,pattern_length,text,text_length,true); + wavefront_sequences_init_ascii( + &wf_bialigner->wf_base->sequences, + pattern,pattern_length,text,text_length,false); +} +void wavefront_bialigner_set_sequences_lambda( + wavefront_bialigner_t* const wf_bialigner, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length) { + wavefront_sequences_init_lambda(&wf_bialigner->wf_forward->sequences, + match_funct,match_funct_arguments,pattern_length,text_length,false); + wavefront_sequences_init_lambda(&wf_bialigner->wf_reverse->sequences, + match_funct,match_funct_arguments,pattern_length,text_length,true); + wavefront_sequences_init_lambda(&wf_bialigner->wf_base->sequences, + match_funct,match_funct_arguments,pattern_length,text_length,false); +} +void wavefront_bialigner_set_sequences_packed2bits( + wavefront_bialigner_t* const wf_bialigner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length) { + wavefront_sequences_init_packed2bits( + &wf_bialigner->wf_forward->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_sequences_init_packed2bits( + &wf_bialigner->wf_reverse->sequences, + pattern,pattern_length,text,text_length,true); + wavefront_sequences_init_packed2bits( + &wf_bialigner->wf_base->sequences, + pattern,pattern_length,text,text_length,false); +} +void wavefront_bialigner_set_sequences_bounds( + wavefront_bialigner_t* const wf_bialigner, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end) { + wavefront_sequences_set_bounds( + &wf_bialigner->wf_forward->sequences, + pattern_begin,pattern_end,text_begin,text_end); + wavefront_sequences_set_bounds( + &wf_bialigner->wf_reverse->sequences, + pattern_begin,pattern_end,text_begin,text_end); + wavefront_sequences_set_bounds( + &wf_bialigner->wf_base->sequences, + pattern_begin,pattern_end,text_begin,text_end); +} +/* + * Accessors + */ +uint64_t wavefront_bialigner_get_size( + wavefront_bialigner_t* const wf_bialigner) { + return wavefront_aligner_get_size(wf_bialigner->wf_forward) + + wavefront_aligner_get_size(wf_bialigner->wf_reverse) + + wavefront_aligner_get_size(wf_bialigner->wf_base); +} +void wavefront_bialigner_set_heuristic( + wavefront_bialigner_t* const wf_bialigner, + wavefront_heuristic_t* const heuristic) { + wf_bialigner->wf_forward->heuristic = *heuristic; + wf_bialigner->wf_reverse->heuristic = *heuristic; + // Heuristics are not inherited to wf_base +} +void wavefront_bialigner_set_max_alignment_steps( + wavefront_bialigner_t* const wf_bialigner, + const int max_alignment_steps) { + wf_bialigner->wf_forward->system.max_alignment_steps = max_alignment_steps; + wf_bialigner->wf_reverse->system.max_alignment_steps = max_alignment_steps; + wf_bialigner->wf_base->system.max_alignment_steps = max_alignment_steps; +} +void wavefront_bialigner_set_max_memory( + wavefront_bialigner_t* const wf_bialigner, + const uint64_t max_memory_resident, + const uint64_t max_memory_abort) { + wf_bialigner->wf_forward->system.max_memory_resident = max_memory_resident; + wf_bialigner->wf_forward->system.max_memory_abort = max_memory_abort; + wf_bialigner->wf_reverse->system.max_memory_resident = max_memory_resident; + wf_bialigner->wf_reverse->system.max_memory_abort = max_memory_abort; + wf_bialigner->wf_base->system.max_memory_resident = max_memory_resident; + wf_bialigner->wf_base->system.max_memory_abort = max_memory_abort; +} +void wavefront_bialigner_set_max_num_threads( + wavefront_bialigner_t* const wf_bialigner, + const int max_num_threads) { + wf_bialigner->wf_forward->system.max_num_threads = max_num_threads; + wf_bialigner->wf_reverse->system.max_num_threads = max_num_threads; + wf_bialigner->wf_base->system.max_num_threads = max_num_threads; +} +void wavefront_bialigner_set_min_offsets_per_thread( + wavefront_bialigner_t* const wf_bialigner, + const int min_offsets_per_thread) { + wf_bialigner->wf_forward->system.min_offsets_per_thread = min_offsets_per_thread; + wf_bialigner->wf_reverse->system.min_offsets_per_thread = min_offsets_per_thread; + wf_bialigner->wf_base->system.min_offsets_per_thread = min_offsets_per_thread; +} diff --git a/pywfa/WFA2_lib/wavefront/wavefront_bialigner.h b/pywfa/WFA2_lib/wavefront/wavefront_bialigner.h new file mode 100644 index 0000000..22d64db --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_bialigner.h @@ -0,0 +1,125 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#ifndef WAVEFRONT_BIALIGNER_H_ +#define WAVEFRONT_BIALIGNER_H_ + +#include "wavefront_penalties.h" +#include "wavefront_attributes.h" +#include "wavefront_heuristic.h" +#include "wavefront_offset.h" +#include "wavefront_sequences.h" + +// Wavefront ahead definition +typedef struct _wavefront_aligner_t wavefront_aligner_t; + +typedef struct { + // Scores + int score; // Score total + int score_forward; // Score (forward) + int score_reverse; // Score (reverse) + // Location + int k_forward; // Breakpoint diagonal (forward) + int k_reverse; // Breakpoint diagonal (reverse) + wf_offset_t offset_forward; // Offset (forward) + wf_offset_t offset_reverse; // Offset (reverse) + affine2p_matrix_type component; // Component (M/I/D) +} wf_bialign_breakpoint_t; + +typedef struct { + // Wavefronts + wavefront_aligner_t* wf_forward; // Breakpoint Forward aligner + wavefront_aligner_t* wf_reverse; // Breakpoint Reverse aligner + wavefront_aligner_t* wf_base; // Base/Subsidiary aligner + // Operators + void (*wf_align_compute)(wavefront_aligner_t* const,const int); +} wavefront_bialigner_t; + +/* + * Setup + */ +wavefront_bialigner_t* wavefront_bialigner_new( + wavefront_aligner_attr_t* const attributes, + wavefront_plot_t* const plot); +void wavefront_bialigner_reap( + wavefront_bialigner_t* const wf_bialigner); +void wavefront_bialigner_delete( + wavefront_bialigner_t* const wf_bialigner); + +/* + * Sequences + */ +void wavefront_bialigner_set_sequences_ascii( + wavefront_bialigner_t* const wf_bialigner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length); +void wavefront_bialigner_set_sequences_lambda( + wavefront_bialigner_t* const wf_bialigner, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length); +void wavefront_bialigner_set_sequences_packed2bits( + wavefront_bialigner_t* const wf_bialigner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length); +void wavefront_bialigner_set_sequences_bounds( + wavefront_bialigner_t* const wf_bialigner, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end); + +/* + * Accessors + */ +uint64_t wavefront_bialigner_get_size( + wavefront_bialigner_t* const wf_bialigner); +void wavefront_bialigner_set_heuristic( + wavefront_bialigner_t* const wf_bialigner, + wavefront_heuristic_t* const heuristic); +void wavefront_bialigner_set_max_alignment_steps( + wavefront_bialigner_t* const wf_bialigner, + const int max_alignment_steps); +void wavefront_bialigner_set_max_memory( + wavefront_bialigner_t* const wf_bialigner, + const uint64_t max_memory_resident, + const uint64_t max_memory_abort); +void wavefront_bialigner_set_max_num_threads( + wavefront_bialigner_t* const wf_bialigner, + const int max_num_threads); +void wavefront_bialigner_set_min_offsets_per_thread( + wavefront_bialigner_t* const wf_bialigner, + const int min_offsets_per_thread); +#endif /* WAVEFRONT_BIALIGNER_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_components.c b/pywfa/WFA2_lib/wavefront/wavefront_components.c index 6df52d6..9512be4 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_components.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_components.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront aligner components */ +#include "utils/commons.h" #include "wavefront_components.h" #include "utils/bitmap.h" #include "system/profiler_timer.h" @@ -55,12 +56,12 @@ void wavefront_components_dimensions_edit( if (wf_components->memory_modular) { *num_wavefronts = 2; } else { - *num_wavefronts = MAX(max_pattern_length,max_text_length); + *num_wavefronts = MAX(max_pattern_length,max_text_length) + 1; } } void wavefront_components_dimensions_linear( wavefront_components_t* const wf_components, - wavefronts_penalties_t* const penalties, + wavefront_penalties_t* const penalties, const int max_pattern_length, const int max_text_length, int* const max_score_scope, @@ -74,12 +75,12 @@ void wavefront_components_dimensions_linear( const int abs_seq_diff = ABS(max_pattern_length-max_text_length); const int max_score_misms = MIN(max_pattern_length,max_text_length) * penalties->mismatch; const int max_score_indel = penalties->gap_opening1 * abs_seq_diff; - *num_wavefronts = max_score_misms + max_score_indel; + *num_wavefronts = max_score_misms + max_score_indel + 1; } } void wavefront_components_dimensions_affine( wavefront_components_t* const wf_components, - wavefronts_penalties_t* const penalties, + wavefront_penalties_t* const penalties, const int max_pattern_length, const int max_text_length, int* const max_score_scope, @@ -94,12 +95,12 @@ void wavefront_components_dimensions_affine( const int abs_seq_diff = ABS(max_pattern_length-max_text_length); const int max_score_misms = MIN(max_pattern_length,max_text_length) * penalties->mismatch; const int max_score_indel = penalties->gap_opening1 + abs_seq_diff * penalties->gap_extension1; - *num_wavefronts = max_score_misms + max_score_indel; + *num_wavefronts = max_score_misms + max_score_indel + 1; } } void wavefront_components_dimensions_affine2p( wavefront_components_t* const wf_components, - wavefronts_penalties_t* const penalties, + wavefront_penalties_t* const penalties, const int max_pattern_length, const int max_text_length, int* const max_score_scope, @@ -118,12 +119,12 @@ void wavefront_components_dimensions_affine2p( const int max_score_indel1 = penalties->gap_opening1 + abs_seq_diff * penalties->gap_extension1; const int max_score_indel2 = penalties->gap_opening2 + abs_seq_diff * penalties->gap_extension2; const int max_score_indel = MIN(max_score_indel1,max_score_indel2); - *num_wavefronts = max_score_misms + max_score_indel; + *num_wavefronts = max_score_misms + max_score_indel + 1; } } void wavefront_components_dimensions( wavefront_components_t* const wf_components, - wavefronts_penalties_t* const penalties, + wavefront_penalties_t* const penalties, const int max_pattern_length, const int max_text_length, int* const max_score_scope, @@ -195,7 +196,7 @@ void wavefront_components_allocate( wavefront_components_t* const wf_components, const int max_pattern_length, const int max_text_length, - wavefronts_penalties_t* const penalties, + wavefront_penalties_t* const penalties, const bool memory_modular, const bool bt_piggyback, mm_allocator_t* const mm_allocator) { @@ -279,7 +280,7 @@ void wavefront_components_resize( wavefront_components_t* const wf_components, const int max_pattern_length, const int max_text_length, - wavefronts_penalties_t* const penalties) { + wavefront_penalties_t* const penalties) { // Compute dimensions int num_wavefronts = 0; wavefront_components_dimensions( @@ -431,10 +432,10 @@ void wavefront_components_translate_wavefronts( void wavefront_components_compact_bt_buffer( wavefront_components_t* const wf_components, const int score, - const bool verbose) { + const int verbose) { // PROFILE profiler_timer_t timer; - if (verbose) { timer_reset(&timer); timer_start(&timer); } + if (verbose >= 3) { timer_reset(&timer); timer_start(&timer); } // Parameters wf_backtrace_buffer_t* const bt_buffer = wf_components->bt_buffer; const uint64_t bt_buffer_used = wf_backtrace_buffer_get_used(bt_buffer); @@ -452,11 +453,10 @@ void wavefront_components_compact_bt_buffer( // Free bitmap_delete(bitmap); // PROFILE - if (verbose) { + if (verbose >= 3) { timer_stop(&timer); fprintf(stderr,"["); timer_print_total(stderr,&timer); fprintf(stderr,"]\n"); } } - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_components.h b/pywfa/WFA2_lib/wavefront/wavefront_components.h index c77602c..4d1856e 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_components.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_components.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_WAVEFRONT_COMPONENTS_H_ #define WAVEFRONT_WAVEFRONT_COMPONENTS_H_ -#include "utils/commons.h" #include "wavefront/wavefront.h" #include "wavefront/wavefront_backtrace_buffer.h" #include "wavefront/wavefront_penalties.h" @@ -70,7 +69,7 @@ void wavefront_components_allocate( wavefront_components_t* const wf_components, const int max_pattern_length, const int max_text_length, - wavefronts_penalties_t* const penalties, + wavefront_penalties_t* const penalties, const bool memory_modular, const bool bt_piggyback, mm_allocator_t* const mm_allocator); @@ -88,7 +87,7 @@ void wavefront_components_resize( wavefront_components_t* const wf_components, const int max_pattern_length, const int max_text_length, - wavefronts_penalties_t* const penalties); + wavefront_penalties_t* const penalties); void wavefront_components_resize_null__victim( wavefront_components_t* const wf_components, const int lo, @@ -100,6 +99,6 @@ void wavefront_components_resize_null__victim( void wavefront_components_compact_bt_buffer( wavefront_components_t* const wf_components, const int score, - const bool verbose); + const int verbose); #endif /* WAVEFRONT_WAVEFRONT_COMPONENTS_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute.c b/pywfa/WFA2_lib/wavefront/wavefront_compute.c index 5db353e..7aff600 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute.c @@ -29,14 +29,15 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "alignment/affine2p_penalties.h" #include "wavefront_compute.h" /* * Compute limits */ -void wavefront_compute_limits( +void wavefront_compute_limits_input( wavefront_aligner_t* const wf_aligner, const wavefront_set_t* const wavefront_set, int* const lo, @@ -45,9 +46,10 @@ void wavefront_compute_limits( const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; const wavefront_t* const m_misms = wavefront_set->in_mwavefront_misms; const wavefront_t* const m_open1 = wavefront_set->in_mwavefront_open1; - // Gap-linear + // Init int min_lo = m_misms->lo; int max_hi = m_misms->hi; + // Gap-linear if (min_lo > m_open1->lo-1) min_lo = m_open1->lo-1; if (max_hi < m_open1->hi+1) max_hi = m_open1->hi+1; if (distance_metric == gap_linear) { @@ -82,48 +84,216 @@ void wavefront_compute_limits( *lo = min_lo; *hi = max_hi; } +void wavefront_compute_limits_output( + wavefront_aligner_t* const wf_aligner, + const int lo, + const int hi, + int* const effective_lo, + int* const effective_hi) { + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const int max_score_scope = wf_components->max_score_scope; + // Add padding to avoid compute-kernel peeling + const int eff_lo = lo - (max_score_scope + 1); + const int eff_hi = hi + (max_score_scope + 1); + // Consider historic (to avoid errors using heuristics) + *effective_lo = MIN(eff_lo,wf_components->historic_min_lo); + *effective_hi = MAX(eff_hi,wf_components->historic_max_hi); + wf_components->historic_min_lo = *effective_lo; + wf_components->historic_max_hi = *effective_hi; +} +/* + * Score translation + */ +int wavefront_compute_classic_score( + wavefront_aligner_t* const wf_aligner, + const int pattern_length, + const int text_length, + const int wf_score) { + // Parameters + const int swg_match = -(wf_aligner->penalties.match); + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + // Adapt score + if (distance_metric <= edit) return wf_score; + if (swg_match == 0) return -wf_score; + return WF_SCORE_TO_SW_SCORE(swg_match,pattern_length,text_length,wf_score); +} +/* + * Compute ends-free init conditions + */ +bool wavefront_compute_endsfree_required( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Parameters + alignment_form_t* const alg_form = &wf_aligner->alignment_form; + wavefront_penalties_t* const penalties = &wf_aligner->penalties; + // Return if ends-free initialization is required + if (penalties->match == 0) return false; + if (alg_form->span != alignment_endsfree) return false; + if (alg_form->text_begin_free == 0 && + alg_form->pattern_begin_free == 0) return false; + if (score % (-penalties->match) != 0) return false; + // Ok + return true; +} +void wavefront_compute_endsfree_limits( + wavefront_aligner_t* const wf_aligner, + const int score, + int* const lo, + int* const hi) { + // Parameters + alignment_form_t* const alg_form = &wf_aligner->alignment_form; + wavefront_penalties_t* const penalties = &wf_aligner->penalties; + // Consider ends-free conditions + const int endsfree_k = score/(-penalties->match); + *hi = (alg_form->text_begin_free >= endsfree_k) ? endsfree_k : INT_MIN; + *lo = (alg_form->pattern_begin_free >= endsfree_k) ? -endsfree_k : INT_MAX; +} +void wavefront_compute_endsfree_init_offset( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const wavefront, + const int k, + const int v, + const int h) { + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + wf_offset_t* const offsets = wavefront->offsets; + // Set offset + offsets[k] = DPMATRIX_OFFSET(h,v); + if (wf_components->bt_piggyback) { + wavefront->bt_pcigar[k] = 0; + wavefront->bt_prev[k] = + wf_backtrace_buffer_init_block(wf_components->bt_buffer,v,h); + } +} +void wavefront_compute_endsfree_init( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const wavefront, + const int score) { + // Parameters + alignment_form_t* const alg_form = &wf_aligner->alignment_form; + wavefront_penalties_t* const penalties = &wf_aligner->penalties; + const int lo = wavefront->lo; + const int hi = wavefront->hi; + // Consider ends-free conditions + int endsfree_k = score/(-penalties->match); + wf_offset_t* const offsets = wavefront->offsets; + // Consider text begin-free + int k; + if (alg_form->text_begin_free >= endsfree_k) { + if (hi >= endsfree_k) { + if (offsets[endsfree_k] <= DPMATRIX_OFFSET(endsfree_k,0)) { + wavefront_compute_endsfree_init_offset(wf_aligner,wavefront,endsfree_k,0,endsfree_k); + } + } else { + for (k=hi+1;khi = endsfree_k; + } + } + // Consider pattern begin-free + if (alg_form->pattern_begin_free >= endsfree_k) { + endsfree_k = -endsfree_k; + if (lo <= endsfree_k) { + if (offsets[endsfree_k] <= DPMATRIX_OFFSET(0,endsfree_k)) { + wavefront_compute_endsfree_init_offset(wf_aligner,wavefront,endsfree_k,-endsfree_k,0); + } + } else { + wavefront_compute_endsfree_init_offset(wf_aligner,wavefront,endsfree_k,-endsfree_k,0); + for (k=endsfree_k+1;klo = endsfree_k; + } + } +} +wavefront_t* wavefront_compute_endsfree_allocate_null( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Parameters + wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; + alignment_form_t* const alg_form = &wf_aligner->alignment_form; + wavefront_penalties_t* const penalties = &wf_aligner->penalties; + // Consider ends-free conditions + const int endsfree_k = score/(-penalties->match); + const bool text_begin_free = (alg_form->text_begin_free >= endsfree_k); + const bool pattern_begin_free = (alg_form->pattern_begin_free >= endsfree_k); + int lo = 0, hi = 0; + if (text_begin_free && pattern_begin_free) { + lo = -endsfree_k; + hi = endsfree_k; + } else if (text_begin_free) { + lo = endsfree_k; + hi = endsfree_k; + } else if (pattern_begin_free) { + lo = -endsfree_k; + hi = -endsfree_k; + } + // Compute effective hi/lo dimensions + int effective_lo, effective_hi; + wavefront_compute_limits_output(wf_aligner,lo,hi,&effective_lo,&effective_hi); + // Allocate & initialize + wavefront_t* const wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_offset_t* const offsets = wavefront->offsets; + int k; + for (k=lo+1;klo = lo; + wavefront->hi = hi; + // Return + return wavefront; +} /* * Input wavefronts (fetch) */ wavefront_t* wavefront_compute_get_mwavefront( wavefront_components_t* const wf_components, - const int score) { - return (score < 0 || - wf_components->mwavefronts[score] == NULL || - wf_components->mwavefronts[score]->null) ? - wf_components->wavefront_null : wf_components->mwavefronts[score]; + const int score_mod) { + return (score_mod < 0 || + wf_components->mwavefronts[score_mod] == NULL || + wf_components->mwavefronts[score_mod]->null) ? + wf_components->wavefront_null : wf_components->mwavefronts[score_mod]; } wavefront_t* wavefront_compute_get_i1wavefront( wavefront_components_t* const wf_components, - const int score) { - return (score < 0 || - wf_components->i1wavefronts[score] == NULL || - wf_components->i1wavefronts[score]->null) ? - wf_components->wavefront_null : wf_components->i1wavefronts[score]; + const int score_mod) { + return (score_mod < 0 || + wf_components->i1wavefronts[score_mod] == NULL || + wf_components->i1wavefronts[score_mod]->null) ? + wf_components->wavefront_null : wf_components->i1wavefronts[score_mod]; } wavefront_t* wavefront_compute_get_i2wavefront( wavefront_components_t* const wf_components, - const int score) { - return (score < 0 || - wf_components->i2wavefronts[score] == NULL || - wf_components->i2wavefronts[score]->null) ? - wf_components->wavefront_null : wf_components->i2wavefronts[score]; + const int score_mod) { + return (score_mod < 0 || + wf_components->i2wavefronts[score_mod] == NULL || + wf_components->i2wavefronts[score_mod]->null) ? + wf_components->wavefront_null : wf_components->i2wavefronts[score_mod]; } wavefront_t* wavefront_compute_get_d1wavefront( wavefront_components_t* const wf_components, - const int score) { - return (score < 0 || - wf_components->d1wavefronts[score] == NULL || - wf_components->d1wavefronts[score]->null) ? - wf_components->wavefront_null : wf_components->d1wavefronts[score]; + const int score_mod) { + return (score_mod < 0 || + wf_components->d1wavefronts[score_mod] == NULL || + wf_components->d1wavefronts[score_mod]->null) ? + wf_components->wavefront_null : wf_components->d1wavefronts[score_mod]; } wavefront_t* wavefront_compute_get_d2wavefront( wavefront_components_t* const wf_components, - const int score) { - return (score < 0 || - wf_components->d2wavefronts[score] == NULL || - wf_components->d2wavefronts[score]->null) ? - wf_components->wavefront_null : wf_components->d2wavefronts[score]; + const int score_mod) { + return (score_mod < 0 || + wf_components->d2wavefronts[score_mod] == NULL || + wf_components->d2wavefronts[score_mod]->null) ? + wf_components->wavefront_null : wf_components->d2wavefronts[score_mod]; } void wavefront_compute_fetch_input( wavefront_aligner_t* const wf_aligner, @@ -133,7 +303,7 @@ void wavefront_compute_fetch_input( wavefront_components_t* const wf_components = &wf_aligner->wf_components; const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; // Compute scores - const wavefronts_penalties_t* const penalties = &(wf_aligner->penalties); + const wavefront_penalties_t* const penalties = &(wf_aligner->penalties); if (distance_metric == gap_linear) { int mismatch = score - penalties->mismatch; int gap_open1 = score - penalties->gap_opening1; @@ -177,111 +347,141 @@ void wavefront_compute_fetch_input( */ void wavefront_compute_free_output( wavefront_aligner_t* const wf_aligner, - const int score) { + const int score_mod) { // Parameters wavefront_components_t* const wf_components = &wf_aligner->wf_components; const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; // Free - if (wf_components->mwavefronts[score]) wavefront_slab_free(wavefront_slab,wf_components->mwavefronts[score]); + if (wf_components->mwavefronts[score_mod]) { + wavefront_slab_free(wavefront_slab,wf_components->mwavefronts[score_mod]); + } if (distance_metric == gap_linear) return; - if (wf_components->i1wavefronts[score]) wavefront_slab_free(wavefront_slab,wf_components->i1wavefronts[score]); - if (wf_components->d1wavefronts[score]) wavefront_slab_free(wavefront_slab,wf_components->d1wavefronts[score]); + if (wf_components->i1wavefronts[score_mod]) { + wavefront_slab_free(wavefront_slab,wf_components->i1wavefronts[score_mod]); + } + if (wf_components->d1wavefronts[score_mod]) { + wavefront_slab_free(wavefront_slab,wf_components->d1wavefronts[score_mod]); + } if (distance_metric == gap_affine) return; - if (wf_components->i2wavefronts[score]) wavefront_slab_free(wavefront_slab,wf_components->i2wavefronts[score]); - if (wf_components->d2wavefronts[score]) wavefront_slab_free(wavefront_slab,wf_components->d2wavefronts[score]); + if (wf_components->i2wavefronts[score_mod]) { + wavefront_slab_free(wavefront_slab,wf_components->i2wavefronts[score_mod]); + } + if (wf_components->d2wavefronts[score_mod]) { + wavefront_slab_free(wavefront_slab,wf_components->d2wavefronts[score_mod]); + } } void wavefront_compute_allocate_output_null( wavefront_aligner_t* const wf_aligner, - int score) { + const int score) { // Parameters - wavefront_components_t* const wf_components = &wf_aligner->wf_components; const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + wavefront_components_t* const wf_components = &wf_aligner->wf_components; // Modular wavefront + int score_mod = score; if (wf_components->memory_modular) { - score = score % wf_components->max_score_scope; - wavefront_compute_free_output(wf_aligner,score); + score_mod = score % wf_components->max_score_scope; + wavefront_compute_free_output(wf_aligner,score_mod); + } + // Consider ends-free (M!=0) + if (wavefront_compute_endsfree_required(wf_aligner,score)) { + wf_components->mwavefronts[score_mod] = + wavefront_compute_endsfree_allocate_null(wf_aligner,score); + } else { + wf_components->mwavefronts[score_mod] = NULL; } // Nullify Wavefronts - wf_components->mwavefronts[score] = NULL; if (distance_metric == gap_linear) return; - wf_components->i1wavefronts[score] = NULL; - wf_components->d1wavefronts[score] = NULL; + wf_components->i1wavefronts[score_mod] = NULL; + wf_components->d1wavefronts[score_mod] = NULL; if (distance_metric == gap_affine) return; - wf_components->i2wavefronts[score] = NULL; - wf_components->d2wavefronts[score] = NULL; + wf_components->i2wavefronts[score_mod] = NULL; + wf_components->d2wavefronts[score_mod] = NULL; } void wavefront_compute_allocate_output( wavefront_aligner_t* const wf_aligner, wavefront_set_t* const wavefront_set, - int score, + const int score, const int lo, const int hi) { // Parameters wavefront_components_t* const wf_components = &wf_aligner->wf_components; const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; - const int max_score_scope = wf_components->max_score_scope; - // Compute effective hi/lo dimensions (after padding to avoid compute-kernel peeling) - const int effective_lo = lo - (max_score_scope+1); - const int effective_hi = hi + (max_score_scope+1); - const int padded_lo = MIN(effective_lo,wf_components->historic_min_lo); - const int padded_hi = MAX(effective_hi,wf_components->historic_max_hi); - wf_components->historic_min_lo = padded_lo; - wf_components->historic_max_hi = padded_hi; + // Consider ends-free (M!=0) + int effective_lo, effective_hi; + if (wavefront_compute_endsfree_required(wf_aligner,score)) { + int endsfree_lo, endsfree_hi; + wavefront_compute_endsfree_limits(wf_aligner,score,&endsfree_lo,&endsfree_hi); + effective_lo = MIN(lo,endsfree_lo); + effective_hi = MAX(hi,endsfree_hi); + } else { + effective_lo = lo; + effective_hi = hi; + } + // Compute effective hi/lo dimensions + wavefront_compute_limits_output( + wf_aligner,effective_lo,effective_hi, + &effective_lo,&effective_hi); // Resize null/victim wavefronts - wavefront_components_resize_null__victim(wf_components,padded_lo,padded_hi); + wavefront_components_resize_null__victim(wf_components,effective_lo,effective_hi); // Modular wavefront + int score_mod = score; if (wf_components->memory_modular) { - score = score % wf_components->max_score_scope; - wavefront_compute_free_output(wf_aligner,score); + score_mod = score % wf_components->max_score_scope; + wavefront_compute_free_output(wf_aligner,score_mod); + } + // Check + if (score_mod >= wf_components->num_wavefronts) { + fprintf(stderr,"[WFA::Compute] Maximum allocated wavefronts reached\n"); + exit(1); } // Allocate M-Wavefront - wavefront_set->out_mwavefront = wavefront_slab_allocate(wavefront_slab,padded_lo,padded_hi); - wf_components->mwavefronts[score] = wavefront_set->out_mwavefront; - wf_components->mwavefronts[score]->lo = lo; - wf_components->mwavefronts[score]->hi = hi; + wavefront_set->out_mwavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->mwavefronts[score_mod] = wavefront_set->out_mwavefront; + wf_components->mwavefronts[score_mod]->lo = lo; + wf_components->mwavefronts[score_mod]->hi = hi; if (distance_metric == gap_linear) return; // Allocate I1-Wavefront if (!wavefront_set->in_mwavefront_open1->null || !wavefront_set->in_i1wavefront_ext->null) { - wavefront_set->out_i1wavefront = wavefront_slab_allocate(wavefront_slab,padded_lo,padded_hi); - wf_components->i1wavefronts[score] = wavefront_set->out_i1wavefront; - wf_components->i1wavefronts[score]->lo = lo; - wf_components->i1wavefronts[score]->hi = hi; + wavefront_set->out_i1wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->i1wavefronts[score_mod] = wavefront_set->out_i1wavefront; + wf_components->i1wavefronts[score_mod]->lo = lo; + wf_components->i1wavefronts[score_mod]->hi = hi; } else { wavefront_set->out_i1wavefront = wf_components->wavefront_victim; - wf_components->i1wavefronts[score] = NULL; + wf_components->i1wavefronts[score_mod] = NULL; } // Allocate D1-Wavefront if (!wavefront_set->in_mwavefront_open1->null || !wavefront_set->in_d1wavefront_ext->null) { - wavefront_set->out_d1wavefront = wavefront_slab_allocate(wavefront_slab,padded_lo,padded_hi); - wf_components->d1wavefronts[score] = wavefront_set->out_d1wavefront; - wf_components->d1wavefronts[score]->lo = lo; - wf_components->d1wavefronts[score]->hi = hi; + wavefront_set->out_d1wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->d1wavefronts[score_mod] = wavefront_set->out_d1wavefront; + wf_components->d1wavefronts[score_mod]->lo = lo; + wf_components->d1wavefronts[score_mod]->hi = hi; } else { wavefront_set->out_d1wavefront = wf_components->wavefront_victim; - wf_components->d1wavefronts[score] = NULL; + wf_components->d1wavefronts[score_mod] = NULL; } if (distance_metric == gap_affine) return; // Allocate I2-Wavefront if (!wavefront_set->in_mwavefront_open2->null || !wavefront_set->in_i2wavefront_ext->null) { - wavefront_set->out_i2wavefront = wavefront_slab_allocate(wavefront_slab,padded_lo,padded_hi); - wf_components->i2wavefronts[score] = wavefront_set->out_i2wavefront; - wf_components->i2wavefronts[score]->lo = lo; - wf_components->i2wavefronts[score]->hi = hi; + wavefront_set->out_i2wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->i2wavefronts[score_mod] = wavefront_set->out_i2wavefront; + wf_components->i2wavefronts[score_mod]->lo = lo; + wf_components->i2wavefronts[score_mod]->hi = hi; } else { wavefront_set->out_i2wavefront = wf_components->wavefront_victim; - wf_components->i2wavefronts[score] = NULL; + wf_components->i2wavefronts[score_mod] = NULL; } // Allocate D2-Wavefront if (!wavefront_set->in_mwavefront_open2->null || !wavefront_set->in_d2wavefront_ext->null) { - wavefront_set->out_d2wavefront = wavefront_slab_allocate(wavefront_slab,padded_lo,padded_hi); - wf_components->d2wavefronts[score] = wavefront_set->out_d2wavefront; - wf_components->d2wavefronts[score]->lo = lo; - wf_components->d2wavefronts[score]->hi = hi; + wavefront_set->out_d2wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->d2wavefronts[score_mod] = wavefront_set->out_d2wavefront; + wf_components->d2wavefronts[score_mod]->lo = lo; + wf_components->d2wavefronts[score_mod]->hi = hi; } else { wavefront_set->out_d2wavefront = wf_components->wavefront_victim; - wf_components->d2wavefronts[score] = NULL; + wf_components->d2wavefronts[score_mod] = NULL; } } /* @@ -372,8 +572,9 @@ void wavefront_compute_trim_ends( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; wf_offset_t* const offsets = wavefront->offsets; // Trim from hi int k; @@ -400,12 +601,18 @@ void wavefront_compute_trim_ends( } wavefront->lo = k; // Set new lo wavefront->wf_elements_init_min = k; + wavefront->null = (wavefront->lo > wavefront->hi); } -void wavefront_compute_trim_ends_set( +void wavefront_compute_process_ends( wavefront_aligner_t* const wf_aligner, - wavefront_set_t* const wavefront_set) { + wavefront_set_t* const wavefront_set, + const int score) { // Parameters const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + // Consider ends-free (M!=0) + if (wavefront_compute_endsfree_required(wf_aligner,score)) { + wavefront_compute_endsfree_init(wf_aligner,wavefront_set->out_mwavefront,score); + } // Trim ends from non-null WFs if (wavefront_set->out_mwavefront) wavefront_compute_trim_ends(wf_aligner,wavefront_set->out_mwavefront); if (distance_metric == gap_linear) return; diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute.h b/pywfa/WFA2_lib/wavefront/wavefront_compute.h index 12fad79..add81a6 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute.h @@ -37,11 +37,26 @@ /* * Compute limits */ -void wavefront_compute_limits( +void wavefront_compute_limits_input( wavefront_aligner_t* const wf_aligner, const wavefront_set_t* const wavefront_set, int* const lo, int* const hi); +void wavefront_compute_limits_output( + wavefront_aligner_t* const wf_aligner, + const int lo, + const int hi, + int* const effective_lo, + int* const effective_hi); + +/* + * Score translation + */ +int wavefront_compute_classic_score( + wavefront_aligner_t* const wf_aligner, + const int pattern_length, + const int text_length, + const int wf_score); /* * Input wavefronts (fetch) @@ -56,11 +71,11 @@ void wavefront_compute_fetch_input( */ void wavefront_compute_allocate_output_null( wavefront_aligner_t* const wf_aligner, - int score); + const int score); void wavefront_compute_allocate_output( wavefront_aligner_t* const wf_aligner, wavefront_set_t* const wavefront_set, - int score, + const int score, const int lo, const int hi); @@ -74,14 +89,15 @@ void wavefront_compute_init_ends( const int hi); /* - * Trim wavefronts ends + * Process wavefronts ends */ void wavefront_compute_trim_ends( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront); -void wavefront_compute_trim_ends_set( +void wavefront_compute_process_ends( wavefront_aligner_t* const wf_aligner, - wavefront_set_t* const wavefront_set); + wavefront_set_t* const wavefront_set, + const int score); /* * Multithread dispatcher diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.c b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.c index ea4b464..c093a4e 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (gap-affine) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" @@ -46,8 +47,9 @@ void wavefront_compute_affine_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -91,8 +93,9 @@ void wavefront_compute_affine_idm_piggyback( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -187,39 +190,23 @@ void wavefront_compute_affine_idm_piggyback( } } /* - * Compute next wavefront + * Compute Wavefronts (gap-affine) */ -void wavefront_compute_affine( +void wavefront_compute_affine_dispatcher( wavefront_aligner_t* const wf_aligner, - const int score) { - // Select wavefronts - wavefront_set_t wavefront_set; - wavefront_compute_fetch_input(wf_aligner,&wavefront_set,score); - // Check null wavefronts - if (wavefront_set.in_mwavefront_misms->null && - wavefront_set.in_mwavefront_open1->null && - wavefront_set.in_i1wavefront_ext->null && - wavefront_set.in_d1wavefront_ext->null) { - wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront - return; - } + wavefront_set_t* const wavefront_set, + const int lo, + const int hi) { // Parameters const bool bt_piggyback = wf_aligner->wf_components.bt_piggyback; - int hi, lo; - // Set limits - wavefront_compute_limits(wf_aligner,&wavefront_set,&lo,&hi); - // Allocate wavefronts - wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); - // Init wavefront ends - wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); - // Multithreading dispatcher const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + // Multithreading dispatcher if (num_threads == 1) { // Compute next wavefront if (bt_piggyback) { - wavefront_compute_affine_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); + wavefront_compute_affine_idm_piggyback(wf_aligner,wavefront_set,lo,hi); } else { - wavefront_compute_affine_idm(wf_aligner,&wavefront_set,lo,hi); + wavefront_compute_affine_idm(wf_aligner,wavefront_set,lo,hi); } } else { #ifdef WFA_PARALLEL @@ -227,22 +214,49 @@ void wavefront_compute_affine( #pragma omp parallel num_threads(num_threads) { int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + const int thread_id = omp_get_thread_num(); + const int thread_num = omp_get_num_threads(); + wavefront_compute_thread_limits(thread_id,thread_num,lo,hi,&t_lo,&t_hi); if (bt_piggyback) { - wavefront_compute_affine_idm_piggyback(wf_aligner,&wavefront_set,t_lo,t_hi); + wavefront_compute_affine_idm_piggyback(wf_aligner,wavefront_set,t_lo,t_hi); } else { - wavefront_compute_affine_idm(wf_aligner,&wavefront_set,t_lo,t_hi); + wavefront_compute_affine_idm(wf_aligner,wavefront_set,t_lo,t_hi); } } #endif } +} +void wavefront_compute_affine( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Select wavefronts + wavefront_set_t wavefront_set; + wavefront_compute_fetch_input(wf_aligner,&wavefront_set,score); + // Check null wavefronts + if (wavefront_set.in_mwavefront_misms->null && + wavefront_set.in_mwavefront_open1->null && + wavefront_set.in_i1wavefront_ext->null && + wavefront_set.in_d1wavefront_ext->null) { + wf_aligner->align_status.num_null_steps++; // Increment null-steps + wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront + return; + } + wf_aligner->align_status.num_null_steps = 0; + // Set limits + int hi, lo; + wavefront_compute_limits_input(wf_aligner,&wavefront_set,&lo,&hi); + // Allocate wavefronts + wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); + // Init wavefront ends + wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); + // Compute wavefronts + wavefront_compute_affine_dispatcher(wf_aligner,&wavefront_set,lo,hi); // Offload backtrace (if necessary) - if (bt_piggyback) { + if (wf_aligner->wf_components.bt_piggyback) { wavefront_backtrace_offload_affine(wf_aligner,&wavefront_set,lo,hi); } - // Trim wavefront ends - wavefront_compute_trim_ends_set(wf_aligner,&wavefront_set); + // Process wavefront ends + wavefront_compute_process_ends(wf_aligner,&wavefront_set,score); } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.h b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.h index 251b712..652d8ed 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine.h @@ -49,7 +49,7 @@ void wavefront_compute_affine_idm_piggyback( const int hi); /* - * Compute wavefront (gap-affine) + * Compute Wavefronts (gap-affine) */ void wavefront_compute_affine( wavefront_aligner_t* const wf_aligner, diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.c b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.c index 82eaf9d..14b4bb1 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (gap-affine-2p) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_compute_affine.h" #include "wavefront_backtrace_offload.h" @@ -47,8 +48,9 @@ void wavefront_compute_affine2p_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -111,8 +113,9 @@ void wavefront_compute_affine2p_idm_piggyback( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -278,7 +281,7 @@ void wavefront_compute_affine2p_idm_piggyback( } } /* - * Compute next wavefront + * Compute wavefronts */ void wavefront_compute_affine2p_dispatcher( wavefront_aligner_t* const wf_aligner, @@ -303,6 +306,31 @@ void wavefront_compute_affine2p_dispatcher( } } } +void wavefront_compute_affine2p_dispatcher_omp( + wavefront_aligner_t* const wf_aligner, + wavefront_set_t* const wavefront_set, + const int lo, + const int hi) { + // Parameters + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + // Multithreading dispatcher + if (num_threads == 1) { + // Compute next wavefront + wavefront_compute_affine2p_dispatcher(wf_aligner,wavefront_set,lo,hi); + } else { +#ifdef WFA_PARALLEL + // Compute next wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + const int thread_id = omp_get_thread_num(); + const int thread_num = omp_get_num_threads(); + wavefront_compute_thread_limits(thread_id,thread_num,lo,hi,&t_lo,&t_hi); + wavefront_compute_affine2p_dispatcher(wf_aligner,wavefront_set,t_lo,t_hi); + } +#endif + } +} void wavefront_compute_affine2p( wavefront_aligner_t* const wf_aligner, const int score) { @@ -317,38 +345,25 @@ void wavefront_compute_affine2p( wavefront_set.in_i2wavefront_ext->null && wavefront_set.in_d1wavefront_ext->null && wavefront_set.in_d2wavefront_ext->null) { + wf_aligner->align_status.num_null_steps++; // Increment null-steps wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront return; } + wf_aligner->align_status.num_null_steps = 0; // Set limits int hi, lo; - wavefront_compute_limits(wf_aligner,&wavefront_set,&lo,&hi); + wavefront_compute_limits_input(wf_aligner,&wavefront_set,&lo,&hi); // Allocate wavefronts wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); // Init wavefront ends wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); - // Multithreading dispatcher - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Compute next wavefront - wavefront_compute_affine2p_dispatcher(wf_aligner,&wavefront_set,lo,hi); - } else { -#ifdef WFA_PARALLEL - // Compute next wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wavefront_compute_affine2p_dispatcher(wf_aligner,&wavefront_set,t_lo,t_hi); - } -#endif - } + // Compute wavefronts + wavefront_compute_affine2p_dispatcher_omp(wf_aligner,&wavefront_set,lo,hi); // Offload backtrace (if necessary) if (wf_aligner->wf_components.bt_piggyback) { wavefront_backtrace_offload_affine(wf_aligner,&wavefront_set,lo,hi); } - // Trim wavefront ends - wavefront_compute_trim_ends_set(wf_aligner,&wavefront_set); + // Process wavefront ends + wavefront_compute_process_ends(wf_aligner,&wavefront_set,score); } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.h b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.h index 9294521..fbb64df 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute_affine2p.h @@ -35,7 +35,7 @@ #include "wavefront_aligner.h" /* - * Compute wavefront (gap-affine-2p) + * Compute Wavefronts (gap-affine-2p) */ void wavefront_compute_affine2p( wavefront_aligner_t* const wf_aligner, diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute_edit.c b/pywfa/WFA2_lib/wavefront/wavefront_compute_edit.c index 46561b2..13202a0 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute_edit.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute_edit.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (edit/indel) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" @@ -47,8 +48,9 @@ void wavefront_compute_indel_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const wf_offset_t* const prev_offsets = wf_prev->offsets; wf_offset_t* const curr_offsets = wf_curr->offsets; // Compute-Next kernel loop @@ -74,8 +76,9 @@ void wavefront_compute_edit_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const wf_offset_t* const prev_offsets = wf_prev->offsets; wf_offset_t* const curr_offsets = wf_curr->offsets; // Compute-Next kernel loop @@ -106,8 +109,9 @@ void wavefront_compute_indel_idm_piggyback( const int hi, const int score) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Previous WF const wf_offset_t* const prev_offsets = wf_prev->offsets; const pcigar_t* const prev_pcigar = wf_prev->bt_pcigar; @@ -148,8 +152,9 @@ void wavefront_compute_edit_idm_piggyback( const int hi, const int score) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Previous WF const wf_offset_t* const prev_offsets = wf_prev->offsets; const pcigar_t* const prev_pcigar = wf_prev->bt_pcigar; @@ -215,8 +220,9 @@ void wavefront_compute_edit_exact_prune( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront) { // Parameters - const int plen = wf_aligner->pattern_length; - const int tlen = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int plen = sequences->pattern_length; + const int tlen = sequences->text_length; wf_offset_t* const offsets = wavefront->offsets; const int lo = wavefront->lo; const int hi = wavefront->hi; @@ -291,6 +297,35 @@ void wavefront_compute_edit_dispatcher( } } } +void wavefront_compute_edit_dispatcher_omp( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const wf_prev, + wavefront_t* const wf_curr, + const int lo, + const int hi, + const int score) { + // Parameters + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + // Multithreading dispatcher + if (num_threads == 1) { + // Compute next wavefront + wavefront_compute_edit_dispatcher( + wf_aligner,score,wf_prev,wf_curr,lo,hi); + } else { +#ifdef WFA_PARALLEL + // Compute next wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + const int thread_id = omp_get_thread_num(); + const int thread_num = omp_get_num_threads(); + wavefront_compute_thread_limits(thread_id,thread_num,lo,hi,&t_lo,&t_hi); + wavefront_compute_edit_dispatcher( + wf_aligner,score,wf_prev,wf_curr,t_lo,t_hi); + } +#endif + } +} void wavefront_compute_edit( wavefront_aligner_t* const wf_aligner, const int score) { @@ -321,25 +356,8 @@ void wavefront_compute_edit( wf_components->mwavefronts[score_curr] = wf_curr; wf_components->mwavefronts[score_curr]->lo = lo; wf_components->mwavefronts[score_curr]->hi = hi; - // Multithreading dispatcher - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Compute next wavefront - wavefront_compute_edit_dispatcher( - wf_aligner,score,wf_prev,wf_curr,lo,hi); - } else { -#ifdef WFA_PARALLEL - // Compute next wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wavefront_compute_edit_dispatcher( - wf_aligner,score,wf_prev,wf_curr,t_lo,t_hi); - } -#endif - } + // Compute Wavefront + wavefront_compute_edit_dispatcher_omp(wf_aligner,wf_prev,wf_curr,lo,hi,score); // Offload backtrace (if necessary) if (wf_components->bt_piggyback && score % PCIGAR_MAX_LENGTH == 0) { wavefront_backtrace_offload_blocks_linear( @@ -347,11 +365,10 @@ void wavefront_compute_edit( } // Trim wavefront ends wavefront_compute_trim_ends(wf_aligner,wf_curr); + if (wf_curr->null) wf_aligner->align_status.num_null_steps = INT_MAX; // Exact pruning paths if (wf_aligner->alignment_form.span == alignment_end2end && wf_aligner->penalties.distance_metric == edit) { wavefront_compute_edit_exact_prune(wf_aligner,wf_curr); } } - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_compute_linear.c b/pywfa/WFA2_lib/wavefront/wavefront_compute_linear.c index 1f290e6..d9c1aa7 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_compute_linear.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_compute_linear.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (gap-linear) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" @@ -46,8 +47,9 @@ void wavefront_compute_linear_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -79,8 +81,9 @@ void wavefront_compute_linear_idm_piggyback( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In M const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const pcigar_t* const m_misms_bt_pcigar = wavefront_set->in_mwavefront_misms->bt_pcigar; @@ -124,37 +127,23 @@ void wavefront_compute_linear_idm_piggyback( } } /* - * Compute next wavefront + * Compute Wavefronts (gap-linear) */ -void wavefront_compute_linear( +void wavefront_compute_linear_dispatcher( wavefront_aligner_t* const wf_aligner, - const int score) { - // Select wavefronts - wavefront_set_t wavefront_set; - wavefront_compute_fetch_input(wf_aligner,&wavefront_set,score); - // Check null wavefronts - if (wavefront_set.in_mwavefront_misms->null && - wavefront_set.in_mwavefront_open1->null) { - wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront - return; - } + wavefront_set_t* const wavefront_set, + const int lo, + const int hi) { // Parameters const bool bt_piggyback = wf_aligner->wf_components.bt_piggyback; - int hi, lo; - // Set limits - wavefront_compute_limits(wf_aligner,&wavefront_set,&lo,&hi); - // Allocate wavefronts - wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); - // Init wavefront ends - wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); - // Multithreading dispatcher const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + // Multithreading dispatcher if (num_threads == 1) { // Compute next wavefront if (bt_piggyback) { - wavefront_compute_linear_idm_piggyback(wf_aligner,&wavefront_set,lo,hi); + wavefront_compute_linear_idm_piggyback(wf_aligner,wavefront_set,lo,hi); } else { - wavefront_compute_linear_idm(wf_aligner,&wavefront_set,lo,hi); + wavefront_compute_linear_idm(wf_aligner,wavefront_set,lo,hi); } } else { #ifdef WFA_PARALLEL @@ -162,22 +151,47 @@ void wavefront_compute_linear( #pragma omp parallel num_threads(num_threads) { int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + const int thread_id = omp_get_thread_num(); + const int thread_num = omp_get_num_threads(); + wavefront_compute_thread_limits(thread_id,thread_num,lo,hi,&t_lo,&t_hi); if (bt_piggyback) { - wavefront_compute_linear_idm_piggyback(wf_aligner,&wavefront_set,t_lo,t_hi); + wavefront_compute_linear_idm_piggyback(wf_aligner,wavefront_set,t_lo,t_hi); } else { - wavefront_compute_linear_idm(wf_aligner,&wavefront_set,t_lo,t_hi); + wavefront_compute_linear_idm(wf_aligner,wavefront_set,t_lo,t_hi); } } #endif } +} +void wavefront_compute_linear( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Select wavefronts + wavefront_set_t wavefront_set; + wavefront_compute_fetch_input(wf_aligner,&wavefront_set,score); + // Check null wavefronts + if (wavefront_set.in_mwavefront_misms->null && + wavefront_set.in_mwavefront_open1->null) { + wf_aligner->align_status.num_null_steps++; // Increment null-steps + wavefront_compute_allocate_output_null(wf_aligner,score); // Null s-wavefront + return; + } + wf_aligner->align_status.num_null_steps = 0; + // Set limits + int hi, lo; + wavefront_compute_limits_input(wf_aligner,&wavefront_set,&lo,&hi); + // Allocate wavefronts + wavefront_compute_allocate_output(wf_aligner,&wavefront_set,score,lo,hi); + // Init wavefront ends + wavefront_compute_init_ends(wf_aligner,&wavefront_set,lo,hi); + // Compute Wavefronts + wavefront_compute_linear_dispatcher(wf_aligner,&wavefront_set,lo,hi); // Offload backtrace (if necessary) - if (bt_piggyback) { + if (wf_aligner->wf_components.bt_piggyback) { wavefront_backtrace_offload_linear(wf_aligner,&wavefront_set,lo,hi); } - // Trim wavefront ends - wavefront_compute_trim_ends_set(wf_aligner,&wavefront_set); + // Process wavefront ends + wavefront_compute_process_ends(wf_aligner,&wavefront_set,score); } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_debug.c b/pywfa/WFA2_lib/wavefront/wavefront_debug.c index 613b175..08497ab 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_debug.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_debug.c @@ -32,6 +32,7 @@ #include "utils/commons.h" #include "wavefront_debug.h" #include "wavefront_align.h" +#include "wavefront_compute.h" /* * Checks @@ -40,15 +41,14 @@ bool wavefront_check_alignment( FILE* const stream, wavefront_aligner_t* const wf_aligner) { // Parameters - const char* const pattern = wf_aligner->pattern; - const int pattern_length = wf_aligner->pattern_length; - const char* const text = wf_aligner->text; - const int text_length = wf_aligner->text_length; - // Custom function to compare sequences - alignment_match_funct_t match_funct = wf_aligner->match_funct; - void* match_funct_arguments = wf_aligner->match_funct_arguments; + wavefront_sequences_t* const sequences = (wf_aligner->bialigner==NULL) ? + &wf_aligner->sequences : &wf_aligner->bialigner->wf_forward->sequences; + const char* const pattern = sequences->pattern_buffer; + const int pattern_length = sequences->pattern_buffer_length; + const char* const text = sequences->text_buffer; + const int text_length = sequences->text_buffer_length; // CIGAR - cigar_t* const cigar = &wf_aligner->cigar; + cigar_t* const cigar = wf_aligner->cigar; char* const operations = cigar->operations; const int begin_offset = cigar->begin_offset; const int end_offset = cigar->end_offset; @@ -59,14 +59,15 @@ bool wavefront_check_alignment( switch (operations[i]) { case 'M': { // Check match - const bool is_match = (match_funct!=NULL) ? - match_funct(pattern_pos,text_pos,match_funct_arguments) : - pattern[pattern_pos] == text[text_pos]; - if (!is_match) { - fprintf(stream,"[WFA::Check] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n", - pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]); - alignment_correct = false; - break; + if (sequences->mode != wf_sequences_lambda) { + const bool is_match = (pattern[pattern_pos]==text[text_pos]); + if (!is_match) { + fprintf(stream,"[WFA::Check] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n", + pattern_pos,pattern[pattern_pos], + text_pos,text[text_pos]); + alignment_correct = false; + break; + } } ++pattern_pos; ++text_pos; @@ -74,14 +75,15 @@ bool wavefront_check_alignment( } case 'X': { // Check mismatch - const bool is_match = (match_funct!=NULL) ? - match_funct(pattern_pos,text_pos,match_funct_arguments) : - pattern[pattern_pos] == text[text_pos]; - if (is_match) { - fprintf(stream,"[WFA::Check] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n", - pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]); - alignment_correct = false; - break; + if (sequences->mode != wf_sequences_lambda) { + const bool is_match = (pattern[pattern_pos]==text[text_pos]); + if (is_match) { + fprintf(stream,"[WFA::Check] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n", + pattern_pos,pattern[pattern_pos], + text_pos,text[text_pos]); + alignment_correct = false; + break; + } } ++pattern_pos; ++text_pos; @@ -120,126 +122,119 @@ bool wavefront_check_alignment( */ void wavefront_report_lite( FILE* const stream, - wavefront_aligner_t* const wf_aligner) { + wavefront_aligner_t* const wf_aligner, + const bool alignment_completed) { // Parameters - const char* const pattern = wf_aligner->pattern; - const int pattern_length = wf_aligner->pattern_length; - const char* const text = wf_aligner->text; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = (wf_aligner->bialigner==NULL) ? + &wf_aligner->sequences : &wf_aligner->bialigner->wf_forward->sequences; + const char* const pattern = sequences->pattern; + const int pattern_length = sequences->pattern_length; + const char* const text = sequences->text; + const int text_length = sequences->text_length; const int status = wf_aligner->align_status.status; const uint64_t memory_used = wf_aligner->align_status.memory_used; - // Banner - fprintf(stream,"[WFA::Debug]"); - // Sequences - fprintf(stream,"\t%d",-wf_aligner->cigar.score); - fprintf(stream,"\t%d\t%d",pattern_length,text_length); - fprintf(stream,"\t%s",(status==0) ? "OK" : "FAIL"); - fprintf(stream,"\t%2.3f",TIMER_GET_TOTAL_MS(&wf_aligner->system.timer)); - fprintf(stream,"\t%luMB\t",CONVERT_B_TO_MB(memory_used)); - cigar_print(stream,&wf_aligner->cigar,true); - if (wf_aligner->match_funct != NULL) { - fprintf(stream,"\t-\t-"); + const bool has_cigar = alignment_completed && !cigar_is_null(wf_aligner->cigar); + // BANNER (#0) + if (alignment_completed) { + fprintf(stream,"[WFA::Debug]"); } else { - fprintf(stream,"\t%.*s\t%.*s",pattern_length,pattern,text_length,text); + fprintf(stream,"[WFA::Debug::BEGIN]"); } - fprintf(stream,"\n"); -} -void wavefront_report_verbose_begin( - FILE* const stream, - wavefront_aligner_t* const wf_aligner) { - // Parameters - const char* const pattern = wf_aligner->pattern; - const int pattern_length = wf_aligner->pattern_length; - const char* const text = wf_aligner->text; - const int text_length = wf_aligner->text_length; - // Input sequences - fprintf(stream,"[WFA::Debug] WFA-Alignment (obj=%p)\n",wf_aligner); - if (wf_aligner->match_funct != NULL) { - fprintf(stream,"[WFA::Debug]\tPattern\t%d\tcustom-funct()\n",pattern_length); - fprintf(stream,"[WFA::Debug]\tText\t%d\tcustom-funct()\n",text_length); + // SCORE (#1) + // const int score = wavefront_compute_classic_score( + // wf_aligner,pattern_length,text_length,wf_aligner->cigar->score); + const int score = wf_aligner->cigar->score; + if (alignment_completed && score!=INT32_MIN) { + fprintf(stream,"\t%d",score); + if (has_cigar) { + const int edit_dist = cigar_score_edit(wf_aligner->cigar); + fprintf(stream,"/%1.2f",(float)edit_dist/(float)MAX(pattern_length,text_length)); + } } else { - fprintf(stream,"[WFA::Debug]\tPattern\t%d\t%.*s\n",pattern_length,pattern_length,pattern); - fprintf(stream,"[WFA::Debug]\tText\t%d\t%.*s\n",text_length,text_length,text); + fprintf(stream,"\t*"); } - // Alignment scope/form - fprintf(stream,"[WFA::Debug]\tScope\t%s\n", - (wf_aligner->alignment_scope == compute_score) ? "score" : "alignment"); - if (wf_aligner->alignment_form.span == alignment_end2end) { - fprintf(stream,"[WFA::Debug]\tForm\t(end2end)\n"); + // PATTERN_LENGTH (#2) + fprintf(stream,"\t%d",pattern_length); + // TEXT_LENGTH (#3) + fprintf(stream,"\t%d",text_length); + // STATUS (#4) + fprintf(stream,"\t%s",wavefront_align_strerror_short(status)); + // TIME (#5) + if (alignment_completed) { + fprintf(stream,"\t%2.3f",TIMER_GET_TOTAL_MS(&wf_aligner->system.timer)); } else { - fprintf(stream,"[WFA::Debug]\tForm\t(endsfree,%d,%d,%d,%d)\n", - wf_aligner->alignment_form.pattern_begin_free, - wf_aligner->alignment_form.pattern_end_free, - wf_aligner->alignment_form.text_begin_free, - wf_aligner->alignment_form.text_end_free); + fprintf(stream,"\t-"); } - fprintf(stream,"[WFA::Debug]\tMax-score\t%d\n", - wf_aligner->system.max_alignment_score); - // Penalties - fprintf(stream,"[WFA::Debug]\tPenalties\t"); - wavefronts_penalties_print(stream,&wf_aligner->penalties); - fprintf(stream,"\n"); - // Heuristic - fprintf(stream,"[WFA::Debug]\tHeuristic\t"); + // MEMORY (#6) + if (alignment_completed) { + fprintf(stream,"\t%luMB\t",CONVERT_B_TO_MB(memory_used)); + } else { + fprintf(stream,"\t-\t"); + } + // ATTRIBUTES (#7) + fprintf(stream,"["); + wavefront_aligner_print_mode(stream,wf_aligner); + fprintf(stream,";"); + wavefront_aligner_print_scope(stream,wf_aligner); + fprintf(stream,";"); + wavefront_penalties_print(stream,&wf_aligner->penalties); + fprintf(stream,";"); + wavefront_aligner_print_conf(stream,wf_aligner); + fprintf(stream,";"); wavefront_heuristic_print(stream,&wf_aligner->heuristic); - fprintf(stream,"\n"); - // Memory mode - fprintf(stream,"[WFA::Debug]\tMemory.mode\t(%d,%luMB,%luMB,%luMB)\n", - wf_aligner->memory_mode, - CONVERT_B_TO_MB(wf_aligner->system.max_memory_compact), - CONVERT_B_TO_MB(wf_aligner->system.max_memory_resident), - CONVERT_B_TO_MB(wf_aligner->system.max_memory_abort)); -} -void wavefront_report_verbose_end( - FILE* const stream, - wavefront_aligner_t* const wf_aligner) { - // Finish report - fprintf(stream,"[WFA::Debug]\tFinish.status\t%d\n",wf_aligner->align_status.status); - fprintf(stream,"[WFA::Debug]\tTime.taken\t"); - timer_print_total(stream,&wf_aligner->system.timer); - fprintf(stream,"\n"); - fprintf(stream,"[WFA::Debug]\tMemory.used\t%luMB\n", - CONVERT_B_TO_MB(wf_aligner->align_status.memory_used)); - fprintf(stream,"[WFA::Debug]\tWFA.score\t%d\n",-(wf_aligner->cigar.score)); - fprintf(stream,"[WFA::Debug]\tWFA.cigar\t"); - cigar_print(stream,&wf_aligner->cigar,true); - fprintf(stream,"\n"); - fprintf(stream,"[WFA::Debug]\tWFA.components (wfs=%d,maxlo=%d,maxhi=%d)\n", + fprintf(stream,";"); + fprintf(stream,"(%d,%d,%d)", wf_aligner->wf_components.num_wavefronts, wf_aligner->wf_components.historic_min_lo, wf_aligner->wf_components.historic_max_hi); + fprintf(stream,"]\t"); + // CIGAR (#8) + if (!has_cigar) { + fprintf(stream,"-"); + } else { + cigar_print(stream,wf_aligner->cigar,true); + } + // SEQUENCES (#9 #10) + if (sequences->mode == wf_sequences_lambda) { + fprintf(stream,"\t-\t-"); + } else { + fprintf(stream,"\t%.*s\t%.*s",pattern_length,pattern,text_length,text); + } + fprintf(stream,"\n"); } /* * Debug */ -void wavefront_debug_prologue( +void wavefront_debug_begin( wavefront_aligner_t* const wf_aligner) { // Check verbose level - if (wf_aligner->system.verbose >= 2) { + if (wf_aligner->system.verbose >= 1) { + timer_reset(&wf_aligner->system.timer); timer_start(&wf_aligner->system.timer); - if (wf_aligner->system.verbose > 2) { - wavefront_report_verbose_begin(stderr,wf_aligner); + if (wf_aligner->system.verbose >= 4) { + wavefront_report_lite(stderr,wf_aligner,false); } } } -void wavefront_debug_epilogue( +void wavefront_debug_end( wavefront_aligner_t* const wf_aligner) { // Print Summary - if (wf_aligner->system.verbose >= 2) { + if (wf_aligner->system.verbose >= 1) { timer_stop(&wf_aligner->system.timer); - if (wf_aligner->system.verbose == 2) { - wavefront_report_lite(stderr,wf_aligner); - } else { - wavefront_report_verbose_end(stderr,wf_aligner); - } + wavefront_report_lite(stderr,wf_aligner,true); } +} +/* + * Check + */ +void wavefront_debug_check_correct( + wavefront_aligner_t* const wf_aligner) { // Check correct if (wf_aligner->system.check_alignment_correct && - wf_aligner->align_status.status == WF_STATUS_SUCCESSFUL && + wf_aligner->align_status.status == WF_STATUS_ALG_COMPLETED && wf_aligner->alignment_scope == compute_alignment) { if (!wavefront_check_alignment(stderr,wf_aligner)) { - fprintf(stderr,"[WFA::Check] Alignment incorrect\n"); - wavefront_report_verbose_end(stderr,wf_aligner); + fprintf(stderr,"[WFA::Check] Error: Alignment incorrect\n"); exit(1); } } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_debug.h b/pywfa/WFA2_lib/wavefront/wavefront_debug.h index a72f6f9..828343a 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_debug.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_debug.h @@ -37,9 +37,15 @@ /* * Debug */ -void wavefront_debug_prologue( +void wavefront_debug_begin( wavefront_aligner_t* const wf_aligner); -void wavefront_debug_epilogue( +void wavefront_debug_end( + wavefront_aligner_t* const wf_aligner); + +/* + * Check + */ +void wavefront_debug_check_correct( wavefront_aligner_t* const wf_aligner); #endif /* WAVEFRONT_DEBUG_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_display.c b/pywfa/WFA2_lib/wavefront/wavefront_display.c index e7bc747..a735686 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_display.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_display.c @@ -29,6 +29,8 @@ * DESCRIPTION: WaveFront-Alignment module for display and report */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_display.h" #include "wavefront_aligner.h" #include "wavefront_compute.h" diff --git a/pywfa/WFA2_lib/wavefront/wavefront_display.h b/pywfa/WFA2_lib/wavefront/wavefront_display.h index ab0c092..38c65ef 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_display.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_display.h @@ -32,8 +32,6 @@ #ifndef WAVEFRONT_DISPLAY_H_ #define WAVEFRONT_DISPLAY_H_ -#include "utils/commons.h" - // Wavefront ahead definition typedef struct _wavefront_aligner_t wavefront_aligner_t; diff --git a/pywfa/WFA2_lib/wavefront/wavefront_extend.c b/pywfa/WFA2_lib/wavefront/wavefront_extend.c index 95a7e6e..1ba7886 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_extend.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_extend.c @@ -26,463 +26,272 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + * DESCRIPTION: WFA module for the "extension" of exact matches */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_extend.h" -#include "wavefront_align.h" +#include "wavefront_extend_kernels.h" +#include "wavefront_extend_kernels_avx.h" #include "wavefront_compute.h" -#include "wavefront_heuristic.h" +#include "wavefront_termination.h" #ifdef WFA_PARALLEL #include #endif /* - * Termination (detect end of alignment) + * Wavefront Extension (End-to-end) */ -bool wavefront_extend_end2end_check_termination( +void wavefront_extend_end2end_dispatcher_seq( wavefront_aligner_t* const wf_aligner, wavefront_t* const mwavefront, const int score, - const int score_mod) { + const int lo, + const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - const affine2p_matrix_type component_end = wf_aligner->component_end; - const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); - const wf_offset_t alignment_offset = DPMATRIX_OFFSET(text_length,pattern_length); - // Select end component - switch (component_end) { - case affine2p_matrix_M: { - // Check diagonal/offset - if (mwavefront->lo > alignment_k || alignment_k > mwavefront->hi) return false; // Not done - const wf_offset_t moffset = mwavefront->offsets[alignment_k]; - if (moffset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_I1: { - // Fetch I1-wavefront & check diagonal/offset - wavefront_t* const i1wavefront = wf_aligner->wf_components.i1wavefronts[score_mod]; - if (i1wavefront == NULL || i1wavefront->lo > alignment_k || alignment_k > i1wavefront->hi) return false; // Not done - const wf_offset_t i1offset = i1wavefront->offsets[alignment_k]; - if (i1offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_I2: { - // Fetch I2-wavefront & check diagonal/offset - wavefront_t* const i2wavefront = wf_aligner->wf_components.i2wavefronts[score_mod]; - if (i2wavefront == NULL || i2wavefront->lo > alignment_k || alignment_k > i2wavefront->hi) return false; // Not done - const wf_offset_t i2offset = i2wavefront->offsets[alignment_k]; - if (i2offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_D1: { - // Fetch D1-wavefront & check diagonal/offset - wavefront_t* const d1wavefront = wf_aligner->wf_components.d1wavefronts[score_mod]; - if (d1wavefront == NULL || d1wavefront->lo > alignment_k || alignment_k > d1wavefront->hi) return false; // Not done - const wf_offset_t d1offset = d1wavefront->offsets[alignment_k]; - if (d1offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_D2: { - // Fetch D2-wavefront & check diagonal/offset - wavefront_t* const d2wavefront = wf_aligner->wf_components.d2wavefronts[score_mod]; - if (d2wavefront == NULL || d2wavefront->lo > alignment_k || alignment_k > d2wavefront->hi) return false; // Not done - const wf_offset_t d2offset = d2wavefront->offsets[alignment_k]; - if (d2offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - default: - break; + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Check the sequence mode + if (seqs->mode == wf_sequences_ascii) { +//#if __AVX2__ // TODO +// wavefront_extend_matches_packed_end2end_avx2(wf_aligner,mwavefront,lo,hi); +//#else + wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,lo,hi); +//#endif + } else { + wf_offset_t dummy; + wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,false,&dummy); } - return false; } -bool wavefront_extend_endsfree_check_termination( +void wavefront_extend_end2end_dispatcher_threads( wavefront_aligner_t* const wf_aligner, wavefront_t* const mwavefront, - const int score, - const int k, - const wf_offset_t offset) { + const int score) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - // Check ends-free reaching boundaries - const int h_pos = WAVEFRONT_H(k,offset); - const int v_pos = WAVEFRONT_V(k,offset); - if (h_pos >= text_length) { // Text is aligned - // Is Pattern end-free? - const int pattern_left = pattern_length - v_pos; - const int pattern_end_free = wf_aligner->alignment_form.pattern_end_free; - if (pattern_left <= pattern_end_free) { - #ifdef WFA_PARALLEL - #pragma omp critical - #endif - { - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = k; - wf_aligner->alignment_end_pos.offset = offset; - } - return true; // Quit (we are done) - } - } - if (v_pos >= pattern_length) { // Pattern is aligned - // Is text end-free? - const int text_left = text_length - h_pos; - const int text_end_free = wf_aligner->alignment_form.text_end_free; - if (text_left <= text_end_free) { - #ifdef WFA_PARALLEL - #pragma omp critical - #endif - { - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = k; - wf_aligner->alignment_end_pos.offset = offset; - } - return true; // Quit (we are done) + const int lo = mwavefront->lo; + const int hi = mwavefront->hi; + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Extend wavefront single-thread + wavefront_extend_end2end_dispatcher_seq(wf_aligner,mwavefront,score,lo,hi); + } else { +#ifdef WFA_PARALLEL + // Extend wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits(omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wavefront_extend_end2end_dispatcher_seq(wf_aligner,mwavefront,score,t_lo,t_hi); } +#endif } - // Not done - return false; } -/* - * Extend kernel - */ -FORCE_INLINE wf_offset_t wavefront_extend_matches_packed_kernel( +int wavefront_extend_end2end( wavefront_aligner_t* const wf_aligner, - const int k, - wf_offset_t offset) { - // Fetch pattern/text blocks - uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->pattern+WAVEFRONT_V(k,offset)); - uint64_t* text_blocks = (uint64_t*)(wf_aligner->text+WAVEFRONT_H(k,offset)); - // Compare 64-bits blocks - uint64_t cmp = *pattern_blocks ^ *text_blocks; - while (__builtin_expect(cmp==0,0)) { - // Increment offset (full block) - offset += 8; - // Next blocks - ++pattern_blocks; - ++text_blocks; - // Compare - cmp = *pattern_blocks ^ *text_blocks; + const int score) { + // Compute score + const bool memory_modular = wf_aligner->wf_components.memory_modular; + const int max_score_scope = wf_aligner->wf_components.max_score_scope; + const int score_mod = (memory_modular) ? score % max_score_scope : score; + // Fetch m-wavefront + wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; + if (mwavefront == NULL) { + // Check alignment feasibility (for heuristic variants that can lead to no solution) + if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { + wf_aligner->align_status.status = WF_STATUS_END_UNREACHABLE; + wf_aligner->align_status.score = score; + return 1; // Done + } + return 0; // Not done } - // Count equal characters - const int equal_right_bits = __builtin_ctzl(cmp); - const int equal_chars = DIV_FLOOR(equal_right_bits,8); - offset += equal_chars; - // Return extended offset - return offset; -} -/* - * Wavefront offset extension comparing characters - * Remember: - * - No offset is out of boundaries !(h>tlen,v>plen) - * - if (h==tlen,v==plen) extension won't increment (sentinels) - */ -FORCE_NO_INLINE void wavefront_extend_matches_packed_end2end( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int lo, - const int hi) { - wf_offset_t* const offsets = mwavefront->offsets; - int k; - for (k=lo;k<=hi;++k) { - // Fetch offset - const wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Extend offset - offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); + // Extend (dispatcher) + wavefront_extend_end2end_dispatcher_threads(wf_aligner,mwavefront,score); + const bool end_reached = wavefront_termination_end2end(wf_aligner,mwavefront,score,score_mod); + if (end_reached) { + wf_aligner->align_status.status = WF_STATUS_END_REACHED; + wf_aligner->align_status.score = score; + return 1; // Done } -} -FORCE_NO_INLINE wf_offset_t wavefront_extend_matches_packed_max( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int lo, - const int hi) { - wf_offset_t* const offsets = mwavefront->offsets; - wf_offset_t max_antidiag = 0; - int k; - for (k=lo;k<=hi;++k) { - // Fetch offset - const wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Extend offset - offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); - // Compute max - const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offsets[k]); - if (max_antidiag < antidiag) max_antidiag = antidiag; + // Cut-off wavefront heuristically + if (wf_aligner->heuristic.strategy != wf_heuristic_none) { + if (wavefront_heuristic_cufoff(wf_aligner,score,score_mod)) { + wf_aligner->align_status.status = WF_STATUS_END_UNREACHABLE; + wf_aligner->align_status.score = score; + return 1; // Done + } } - return max_antidiag; + return 0; // Not done } -FORCE_NO_INLINE bool wavefront_extend_matches_packed_endsfree( +/* + * Wavefront Extension (End-to-end + MAX-antidiagonal) + */ +wf_offset_t wavefront_extend_end2end_max_dispatcher_seq( wavefront_aligner_t* const wf_aligner, wavefront_t* const mwavefront, const int score, const int lo, const int hi) { - wf_offset_t* const offsets = mwavefront->offsets; - int k; - for (k=lo;k<=hi;++k) { - // Fetch offset - wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Extend offset - offset = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); - offsets[k] = offset; - // Check ends-free reaching boundaries - if (wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,score,k,offset)) { - return true; // Quit (we are done) - } + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Check the sequence mode + if (seqs->mode == wf_sequences_ascii) { + return wavefront_extend_matches_packed_end2end_max(wf_aligner,mwavefront,lo,hi); + } else { + wf_offset_t max_antidiag; + wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,false,&max_antidiag); + return max_antidiag; } - // Alignment not finished - return false; } -bool wavefront_extend_matches_custom( +wf_offset_t wavefront_extend_end2end_max_dispatcher_threads( wavefront_aligner_t* const wf_aligner, wavefront_t* const mwavefront, - const int score, - const int lo, - const int hi, - const bool endsfree) { - // Parameters (custom matching function) - alignment_match_funct_t match_funct = wf_aligner->match_funct; - void* const func_arguments = wf_aligner->match_funct_arguments; - // Extend diagonally each wavefront point - wf_offset_t* const offsets = mwavefront->offsets; - int k; - for (k=lo;k<=hi;++k) { - // Check offset - wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Count equal characters - int v = WAVEFRONT_V(k,offset); - int h = WAVEFRONT_H(k,offset); - while (match_funct(v,h,func_arguments)) { - h++; v++; offset++; - } - // Update offset - offsets[k] = offset; - // Check ends-free reaching boundaries - if (endsfree && wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,score,k,offset)) { - return true; // Quit (we are done) - } - } - // Alignment not finished - return false; -} -/* - * Wavefront exact "extension" - */ -int wavefront_extend_end2end_max( - wavefront_aligner_t* const wf_aligner, const int score) { - // Compute score - const bool memory_modular = wf_aligner->wf_components.memory_modular; - const int max_score_scope = wf_aligner->wf_components.max_score_scope; - const int score_mod = (memory_modular) ? score % max_score_scope : score; - // Fetch m-wavefront - wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; - if (mwavefront==NULL) return 0; // Not done - // Multithreading dispatcher + // Parameters const int lo = mwavefront->lo; const int hi = mwavefront->hi; wf_offset_t max_antidiag = 0; + // Select number of threads const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); if (num_threads == 1) { - // Extend wavefront - max_antidiag = wavefront_extend_matches_packed_max(wf_aligner,mwavefront,lo,hi); + // Extend wavefront single-thread + max_antidiag = wavefront_extend_end2end_max_dispatcher_seq(wf_aligner,mwavefront,score,lo,hi); } else { -#ifdef WFA_PARALLEL // Extend wavefront in parallel +#ifdef WFA_PARALLEL #pragma omp parallel num_threads(num_threads) { int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wf_offset_t t_max_antidiag = wavefront_extend_matches_packed_max(wf_aligner,mwavefront,t_lo,t_hi); - #ifdef WFA_PARALLEL + wavefront_compute_thread_limits(omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wf_offset_t t_max_antidiag = wavefront_extend_end2end_max_dispatcher_seq(wf_aligner,mwavefront,score,t_lo,t_hi); #pragma omp critical - #endif { if (t_max_antidiag > max_antidiag) max_antidiag = t_max_antidiag; } } #endif } - // Cut-off wavefront heuristically - if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - const bool alignment_dropped = wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - if (alignment_dropped) { - wf_aligner->align_status.status = WF_STATUS_HEURISTICALY_DROPPED; - fprintf(stderr,"[WFA:Extend_max] Heuristically dropped error \n"); - exit(-1); - } - } + // Return maximum antidiagonal return max_antidiag; } -int wavefront_extend_end2end( +int wavefront_extend_end2end_max( wavefront_aligner_t* const wf_aligner, - const int score) { + const int score, + int* const max_antidiagonal) { // Compute score const bool memory_modular = wf_aligner->wf_components.memory_modular; const int max_score_scope = wf_aligner->wf_components.max_score_scope; const int score_mod = (memory_modular) ? score % max_score_scope : score; + *max_antidiagonal = 0; // Init // Fetch m-wavefront wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; - if (mwavefront==NULL) return 0; // Not done - // Multithreading dispatcher - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; - bool end_reached = false; - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Extend wavefront - wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,lo,hi); - } else { -#ifdef WFA_PARALLEL - // Extend wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,t_lo,t_hi); + if (mwavefront == NULL) { + // Check alignment feasibility (heuristic variants that can lead to no solution) + if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { + wf_aligner->align_status.status = WF_STATUS_END_UNREACHABLE; + wf_aligner->align_status.score = score; + return 1; // Done } -#endif + return 0; // Not done } - // Check end-to-end finished - end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront,score,score_mod); + // Extend (dispatcher) + const wf_offset_t max_ak = wavefront_extend_end2end_max_dispatcher_threads(wf_aligner,mwavefront,score); + const bool end_reached = wavefront_termination_end2end(wf_aligner,mwavefront,score,score_mod); if (end_reached) { - wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; + wf_aligner->align_status.status = WF_STATUS_END_REACHED; + wf_aligner->align_status.score = score; return 1; // Done } // Cut-off wavefront heuristically if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - const bool alignment_dropped = wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - if (alignment_dropped) { - wf_aligner->align_status.status = WF_STATUS_HEURISTICALY_DROPPED; + if (wavefront_heuristic_cufoff(wf_aligner,score,score_mod)) { + wf_aligner->align_status.status = WF_STATUS_END_UNREACHABLE; + wf_aligner->align_status.score = score; return 1; // Done } } + *max_antidiagonal = max_ak; return 0; // Not done } -int wavefront_extend_endsfree( +/* + * Wavefront Extension (Ends-free) + */ +bool wavefront_extend_endsfree_dispatcher_seq( wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi) { + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Check the sequence mode + if (seqs->mode == wf_sequences_ascii) { + return wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,lo,hi); + } else { + wf_offset_t dummy; + return wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,true,&dummy); + } +} +bool wavefront_extend_endsfree_dispatcher_threads( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, const int score) { - // Modular wavefront - const bool memory_modular = wf_aligner->wf_components.memory_modular; - const int max_score_scope = wf_aligner->wf_components.max_score_scope; - const int score_mod = (memory_modular) ? score % max_score_scope : score; - // Fetch m-wavefront - wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; - if (mwavefront==NULL) return 0; // Not done - // Multithreading dispatcher + // Parameters const int lo = mwavefront->lo; const int hi = mwavefront->hi; bool end_reached = false; const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); if (num_threads == 1) { - // Extend wavefront - end_reached = wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,lo,hi); + // Extend wavefront single-thread + end_reached = wavefront_extend_endsfree_dispatcher_seq(wf_aligner,mwavefront,score,lo,hi); } else { #ifdef WFA_PARALLEL // Extend wavefront in parallel #pragma omp parallel num_threads(num_threads) { int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - if (wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,t_lo,t_hi)) { + wavefront_compute_thread_limits(omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + if (wavefront_extend_endsfree_dispatcher_seq(wf_aligner,mwavefront,score,t_lo,t_hi)) { end_reached = true; } } #endif } - if (end_reached) { - wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; - return 1; // Done - } - // Cut-off wavefront heuristically - if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - const bool alignment_dropped = wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - if (alignment_dropped) { - wf_aligner->align_status.status = WF_STATUS_HEURISTICALY_DROPPED; - return 1; // Done - } - } - return 0; // Not done + // Return end-reached + return end_reached; } -int wavefront_extend_custom( +int wavefront_extend_endsfree( wavefront_aligner_t* const wf_aligner, const int score) { - // Compute score + // Modular wavefront const bool memory_modular = wf_aligner->wf_components.memory_modular; const int max_score_scope = wf_aligner->wf_components.max_score_scope; const int score_mod = (memory_modular) ? score % max_score_scope : score; // Fetch m-wavefront wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; - if (mwavefront==NULL) return 0; // Not done - // Multithreading dispatcher - const bool endsfree = (wf_aligner->alignment_form.span == alignment_endsfree); - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; - bool end_reached = false; - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Extend wavefront - end_reached = wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,endsfree); - } else { -#ifdef WFA_PARALLEL - // Extend wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - if (wavefront_extend_matches_custom(wf_aligner,mwavefront,score,t_lo,t_hi,endsfree)) { - end_reached = true; - } + if (mwavefront == NULL) { + // Check alignment feasibility (heuristic variants that can lead to no solution) + if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { + wf_aligner->align_status.status = WF_STATUS_END_UNREACHABLE; + wf_aligner->align_status.score = score; + return 1; // Done } -#endif - } - // Check end-to-end finished - if (!endsfree) { - end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront,score,score_mod); + return 0; // Not done } + // Extend (dispatcher) + const bool end_reached = wavefront_extend_endsfree_dispatcher_threads(wf_aligner,mwavefront,score); if (end_reached) { - wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; + wf_aligner->align_status.status = WF_STATUS_END_REACHED; + wf_aligner->align_status.score = score; return 1; // Done } // Cut-off wavefront heuristically if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - const bool alignment_dropped = wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - if (alignment_dropped) { - wf_aligner->align_status.status = WF_STATUS_HEURISTICALY_DROPPED; + if (wavefront_heuristic_cufoff(wf_aligner,score,score_mod)) { + wf_aligner->align_status.status = WF_STATUS_END_UNREACHABLE; + wf_aligner->align_status.score = score; return 1; // Done } } return 0; // Not done } - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_extend.h b/pywfa/WFA2_lib/wavefront/wavefront_extend.h index 9240101..d79d2ff 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_extend.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_extend.h @@ -26,7 +26,7 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + * DESCRIPTION: WFA module for the "extension" of exact matches */ #ifndef WAVEFRONT_EXTEND_H_ @@ -35,19 +35,18 @@ #include "wavefront_aligner.h" /* - * Wavefront exact "extension" + * Wavefront extension */ int wavefront_extend_end2end( wavefront_aligner_t* const wf_aligner, const int score); int wavefront_extend_end2end_max( wavefront_aligner_t* const wf_aligner, - const int score); + const int score, + int* const max_antidiagonal); + int wavefront_extend_endsfree( wavefront_aligner_t* const wf_aligner, const int score); -int wavefront_extend_custom( - wavefront_aligner_t* const wf_aligner, - const int score); #endif /* WAVEFRONT_EXTEND_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.c b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.c new file mode 100644 index 0000000..7600442 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.c @@ -0,0 +1,203 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module for the "extension" of exact matches + */ + +#include + +#include "wavefront_extend_kernels.h" +#include "wavefront_termination.h" + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define wavefront_extend_matches_kernel wavefront_extend_matches_kernel_blockwise +#else +#define wavefront_extend_matches_kernel wavefront_extend_matches_kernel_charwise +#endif + +/* + * Inner-most extend kernel + */ +FORCE_INLINE wf_offset_t wavefront_extend_matches_kernel_charwise( + wavefront_aligner_t* const wf_aligner, + const int k, + wf_offset_t offset) { + // Fetch pattern/text + char* pattern_ptr = wf_aligner->sequences.pattern + WAVEFRONT_V(k,offset); + char* text_ptr = wf_aligner->sequences.text + WAVEFRONT_H(k,offset); + // Compare 64-bits blocks + while (*pattern_ptr == *text_ptr) { + // Increment offset + offset++; + // Next chars + ++pattern_ptr; + ++text_ptr; + } + // Return extended offset + return offset; +} +FORCE_INLINE wf_offset_t wavefront_extend_matches_kernel_blockwise( + wavefront_aligner_t* const wf_aligner, + const int k, + wf_offset_t offset) { + // Fetch pattern/text blocks + uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->sequences.pattern+WAVEFRONT_V(k,offset)); + uint64_t* text_blocks = (uint64_t*)(wf_aligner->sequences.text+WAVEFRONT_H(k,offset)); + // Compare 64-bits blocks + uint64_t cmp = *pattern_blocks ^ *text_blocks; + while (__builtin_expect(cmp==0,0)) { + // Increment offset (full block) + offset += 8; + // Next blocks + ++pattern_blocks; + ++text_blocks; + // Compare + cmp = *pattern_blocks ^ *text_blocks; + } + // Count equal characters + const int equal_right_bits = __builtin_ctzl(cmp); + const int equal_chars = DIV_FLOOR(equal_right_bits,8); + offset += equal_chars; + // Return extended offset + return offset; +} +/* + * Wavefront-Extend Inner Kernels + * Wavefront offset extension comparing characters + * Remember: + * - No offset is out of boundaries !(h>tlen,v>plen) + * - if (h==tlen,v==plen) extension won't increment (sentinels) + */ +FORCE_NO_INLINE void wavefront_extend_matches_packed_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi) { + wf_offset_t* const offsets = mwavefront->offsets; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset + const wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offsets[k] = wavefront_extend_matches_kernel(wf_aligner,k,offset); + } +} +FORCE_NO_INLINE wf_offset_t wavefront_extend_matches_packed_end2end_max( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi) { + wf_offset_t* const offsets = mwavefront->offsets; + wf_offset_t max_antidiag = 0; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset + const wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offsets[k] = wavefront_extend_matches_kernel(wf_aligner,k,offset); + // Compute max + const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offsets[k]); + if (max_antidiag < antidiag) max_antidiag = antidiag; + } + return max_antidiag; +} +FORCE_NO_INLINE bool wavefront_extend_matches_packed_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi) { + // Parameters + wf_offset_t* const offsets = mwavefront->offsets; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset + wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offset = wavefront_extend_matches_kernel(wf_aligner,k,offset); + offsets[k] = offset; + // Check ends-free reaching boundaries + if (wavefront_termination_endsfree(wf_aligner,mwavefront,score,k,offset)) { + return true; // Quit (we are done) + } + /* + * TODO + const int h_pos = WAVEFRONT_H(k,offset); + const int v_pos = WAVEFRONT_V(k,offset); + if (h_pos >= text_length || v_pos >= pattern_length) { // FIXME Use wherever necessary + if (wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,score,k,offset)) { + return true; // Quit (we are done) + } + */ + } + // Alignment not finished + return false; +} +/* + * Wavefront-Extend Inner Kernel (Custom match function) + */ +bool wavefront_extend_matches_custom( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi, + const bool endsfree, + wf_offset_t* const max_antidiag) { + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Extend diagonally each wavefront point + wf_offset_t* const offsets = mwavefront->offsets; + *max_antidiag = 0; + int k; + for (k=lo;k<=hi;++k) { + // Check offset + wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Count equal characters + int v = WAVEFRONT_V(k,offset); + int h = WAVEFRONT_H(k,offset); + while (wavefront_sequences_cmp(seqs,v,h)) { + h++; v++; offset++; + } + // Update offset + offsets[k] = offset; + // Compute max + const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offset); + if (*max_antidiag < antidiag) *max_antidiag = antidiag; + // Check ends-free reaching boundaries + if (endsfree && wavefront_termination_endsfree(wf_aligner,mwavefront,score,k,offset)) { + return true; // Quit (we are done) + } + } + // Alignment not finished + return false; +} diff --git a/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.h b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.h new file mode 100644 index 0000000..d0a28eb --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels.h @@ -0,0 +1,69 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module for the "extension" of exact matches + */ + +#ifndef WAVEFRONT_EXTEND_KERNELS_H_ +#define WAVEFRONT_EXTEND_KERNELS_H_ + +#include "wavefront_aligner.h" + +/* + * Wavefront-Extend Inner Kernels + */ +void wavefront_extend_matches_packed_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi); +wf_offset_t wavefront_extend_matches_packed_end2end_max( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi); +bool wavefront_extend_matches_packed_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi); + +/* + * Wavefront-Extend Inner Kernel (Custom match function) + */ +bool wavefront_extend_matches_custom( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi, + const bool endsfree, + wf_offset_t* const max_antidiag); + +#endif /* WAVEFRONT_EXTEND_KERNELS_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.c b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.c new file mode 100644 index 0000000..b31d8d5 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.c @@ -0,0 +1,167 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + */ + +#include "wavefront_extend.h" +#include "wavefront_align.h" +#include "wavefront_compute.h" +#include "wavefront_heuristic.h" +#include "wavefront_extend_kernels.h" +#include "wavefront_extend_kernels_avx.h" + +#if __AVX2__ +#include +/* + * Wavefront-Extend Inner Kernel (Scalar) + */ +FORCE_INLINE wf_offset_t wavefront_extend_matches_packed_kernel( + wavefront_aligner_t* const wf_aligner, + const int k, + wf_offset_t offset) { + // Fetch pattern/text blocks + uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->sequences.pattern+WAVEFRONT_V(k,offset)); + uint64_t* text_blocks = (uint64_t*)(wf_aligner->sequences.text+WAVEFRONT_H(k,offset)); + // Compare 64-bits blocks + uint64_t cmp = *pattern_blocks ^ *text_blocks; + while (__builtin_expect(cmp==0,0)) { + // Increment offset (full block) + offset += 8; + // Next blocks + ++pattern_blocks; + ++text_blocks; + // Compare + cmp = *pattern_blocks ^ *text_blocks; + } + // Count equal characters + const int equal_right_bits = __builtin_ctzl(cmp); + const int equal_chars = DIV_FLOOR(equal_right_bits,8); + offset += equal_chars; + // Return extended offset + return offset; +} +/* + * SIMD clz, use a native instruction when available (AVX512 CD or VL + * extensions), or emulate the clz behavior. + */ +FORCE_INLINE __m256i avx2_lzcnt_epi32(__m256i v) { +#if __AVX512CD__ && __AVX512VL__ + return _mm256_lzcnt_epi32(v); +#else + // Emulate clz for AVX2: https://stackoverflow.com/a/58827596 + v = _mm256_andnot_si256(_mm256_srli_epi32(v,8),v); // keep 8 MSB + v = _mm256_castps_si256(_mm256_cvtepi32_ps(v)); // convert an integer to float + v = _mm256_srli_epi32(v,23); // shift down the exponent + v = _mm256_subs_epu16(_mm256_set1_epi32(158),v); // undo bias + v = _mm256_min_epi16(v,_mm256_set1_epi32(32)); // clamp at 32 + return v; +#endif +} +/* + * Wavefront-Extend Inner Kernel (SIMD AVX2/AVX512) + */ +FORCE_NO_INLINE void wavefront_extend_matches_packed_end2end_avx2( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi) { + // Parameters + wf_offset_t* const offsets = mwavefront->offsets; + int k_min = lo; + int k_max = hi; + const char* pattern = wf_aligner->sequences.pattern; + const char* text = wf_aligner->sequences.text; + const __m256i vector_null = _mm256_set1_epi32(-1); + const __m256i fours = _mm256_set1_epi32(4); + const __m256i eights = _mm256_set1_epi32(8); + const __m256i vecShuffle = _mm256_set_epi8(28,29,30,31,24,25,26,27, + 20,21,22,23,16,17,18,19, + 12,13,14,15, 8, 9,10,11, + 4 , 5, 6, 7, 0, 1, 2 ,3); + const int elems_per_register = 8; + int num_of_diagonals = k_max - k_min + 1; + int loop_peeling_iters = num_of_diagonals % elems_per_register; + int k; + for (k=k_min;k= 0) { + offsets[curr_k] = wavefront_extend_matches_packed_kernel(wf_aligner,curr_k,offset); + } else { + offsets[curr_k] = WAVEFRONT_OFFSET_NULL; + } + mask &= (0xfffffff0 << tz); + } + } +} + +#endif // AVX2 diff --git a/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.h b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.h new file mode 100644 index 0000000..0e932d4 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_extend_kernels_avx.h @@ -0,0 +1,47 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + */ + +#ifndef WAVEFRONT_EXTEND_AVX_H_ +#define WAVEFRONT_EXTEND_AVX_H_ + +#if __AVX2__ + +#include "wavefront_aligner.h" + +void wavefront_extend_matches_packed_end2end_avx2( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi); + +#endif // AVX2 + +#endif /* WAVEFRONT_EXTEND_AVX_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_heuristic.c b/pywfa/WFA2_lib/wavefront/wavefront_heuristic.c index 6e84927..c884560 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_heuristic.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_heuristic.c @@ -29,6 +29,8 @@ * DESCRIPTION: Support functions for wavefront heuristic strategies */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_heuristic.h" #include "wavefront_aligner.h" @@ -39,32 +41,24 @@ void wavefront_heuristic_set_none( wavefront_heuristic_t* const wf_heuristic) { wf_heuristic->strategy = wf_heuristic_none; } -void wavefront_heuristic_set_banded_static( - wavefront_heuristic_t* const wf_heuristic, - const int band_min_k, - const int band_max_k) { - wf_heuristic->strategy = wf_heuristic_banded_static; - wf_heuristic->min_k = band_min_k; - wf_heuristic->max_k = band_max_k; -} -void wavefront_heuristic_set_banded_adaptive( +void wavefront_heuristic_set_wfadaptive( wavefront_heuristic_t* const wf_heuristic, - const int band_min_k, - const int band_max_k, + const int min_wavefront_length, + const int max_distance_threshold, const int steps_between_cutoffs) { - wf_heuristic->strategy = wf_heuristic_banded_adaptive; - wf_heuristic->min_k = band_min_k; - wf_heuristic->max_k = band_max_k; + wf_heuristic->strategy |= wf_heuristic_wfadaptive; + wf_heuristic->min_wavefront_length = min_wavefront_length; + wf_heuristic->max_distance_threshold = max_distance_threshold; wf_heuristic->steps_between_cutoffs = steps_between_cutoffs; // Internals wf_heuristic->steps_wait = steps_between_cutoffs; } -void wavefront_heuristic_set_wfadaptive( +void wavefront_heuristic_set_wfmash( wavefront_heuristic_t* const wf_heuristic, const int min_wavefront_length, const int max_distance_threshold, const int steps_between_cutoffs) { - wf_heuristic->strategy = wf_heuristic_wfadaptive; + wf_heuristic->strategy |= wf_heuristic_wfmash; wf_heuristic->min_wavefront_length = min_wavefront_length; wf_heuristic->max_distance_threshold = max_distance_threshold; wf_heuristic->steps_between_cutoffs = steps_between_cutoffs; @@ -75,7 +69,7 @@ void wavefront_heuristic_set_xdrop( wavefront_heuristic_t* const wf_heuristic, const int xdrop, const int steps_between_cutoffs) { - wf_heuristic->strategy = wf_heuristic_xdrop; + wf_heuristic->strategy |= wf_heuristic_xdrop; wf_heuristic->xdrop = xdrop; wf_heuristic->steps_between_cutoffs = steps_between_cutoffs; // Internals @@ -88,7 +82,7 @@ void wavefront_heuristic_set_zdrop( wavefront_heuristic_t* const wf_heuristic, const int zdrop, const int steps_between_cutoffs) { - wf_heuristic->strategy = wf_heuristic_zdrop; + wf_heuristic->strategy |= wf_heuristic_zdrop; wf_heuristic->zdrop = zdrop; wf_heuristic->steps_between_cutoffs = steps_between_cutoffs; // Internals @@ -97,6 +91,26 @@ void wavefront_heuristic_set_zdrop( wf_heuristic->max_sw_score_offset = WAVEFRONT_OFFSET_NULL; wf_heuristic->max_sw_score_k = DPMATRIX_DIAGONAL_NULL; } +void wavefront_heuristic_set_banded_static( + wavefront_heuristic_t* const wf_heuristic, + const int band_min_k, + const int band_max_k) { + wf_heuristic->strategy |= wf_heuristic_banded_static; + wf_heuristic->min_k = band_min_k; + wf_heuristic->max_k = band_max_k; +} +void wavefront_heuristic_set_banded_adaptive( + wavefront_heuristic_t* const wf_heuristic, + const int band_min_k, + const int band_max_k, + const int steps_between_cutoffs) { + wf_heuristic->strategy |= wf_heuristic_banded_adaptive; + wf_heuristic->min_k = band_min_k; + wf_heuristic->max_k = band_max_k; + wf_heuristic->steps_between_cutoffs = steps_between_cutoffs; + // Internals + wf_heuristic->steps_wait = steps_between_cutoffs; +} void wavefront_heuristic_clear( wavefront_heuristic_t* const wf_heuristic) { // Internals @@ -108,7 +122,7 @@ void wavefront_heuristic_clear( /* * Utils */ -int wf_compute_distance_end2end( +int wf_distance_end2end( const wf_offset_t offset, const int k, const int pattern_length, @@ -117,7 +131,19 @@ int wf_compute_distance_end2end( const int left_h = text_length - WAVEFRONT_H(k,offset); return (offset >= 0) ? MAX(left_v,left_h) : -WAVEFRONT_OFFSET_NULL; } -int wf_compute_distance_endsfree( +int wf_distance_end2end_weighted( + const wf_offset_t offset, + const int k, + const int pattern_length, + const int text_length, + const int mfactor) { + const int v = WAVEFRONT_V(k,offset); + const int h = WAVEFRONT_H(k,offset); + const int left_v = ((float)(pattern_length - v)/pattern_length * mfactor); + const int left_h = ((float)(text_length - h)/text_length * mfactor); + return (offset >= 0) ? MAX(left_v,left_h) : -WAVEFRONT_OFFSET_NULL; +} +int wf_distance_endsfree( const wf_offset_t offset, const int k, const int pattern_length, @@ -132,26 +158,7 @@ int wf_compute_distance_endsfree( const int dist_down = MAX(left_v,left_h_endsfree); return (offset >= 0) ? MIN(dist_up,dist_down) : -WAVEFRONT_OFFSET_NULL; } -int wf_compute_sw_score( - const int wf_score, - const wf_offset_t offset, - const int k) { - const int v = WAVEFRONT_V(k,offset); - const int h = WAVEFRONT_H(k,offset); - return (offset >= 0) ? MIN(v,h) - wf_score : WAVEFRONT_OFFSET_NULL; -} -int wf_compute_sw_score_single_gap( - const int gap_extension, - const wf_offset_t wf1_offset, - const int wf1_k, - const wf_offset_t wf2_offset, - const int wf2_k) { - const int diff_h = WAVEFRONT_H(wf2_k,wf2_offset) - WAVEFRONT_H(wf1_k,wf1_offset); - const int diff_v = WAVEFRONT_V(wf2_k,wf2_offset) - WAVEFRONT_V(wf1_k,wf1_offset); - const int abs_diff = (diff_h >= diff_v) ? diff_h-diff_v : diff_v-diff_h; - return abs_diff * gap_extension; -} -void wavefront_heuristic_cutoff_equate( +void wf_heuristic_equate( wavefront_t* const wavefront_dst, wavefront_t* const wavefront_src) { if (wavefront_dst != NULL) { @@ -164,80 +171,45 @@ void wavefront_heuristic_cutoff_equate( } } /* - * Cut-offs Banded + * Heuristic Cut-off Wavefront-Adaptive */ -void wavefront_cufoff_banded_static( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const wavefront) { - // Parameters - wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; - // Check wavefront limits - if (wavefront->lo < wf_heuristic->min_k) wavefront->lo = wf_heuristic->min_k; - if (wavefront->hi > wf_heuristic->max_k) wavefront->hi = wf_heuristic->max_k; -} -void wavefront_cufoff_banded_adaptive( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const wavefront) { - // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; - // Check steps - if (wf_heuristic->steps_wait > 0) return; - // Check wavefront length - const int lo = wavefront->lo; - const int hi = wavefront->hi; - const int wf_length = hi - lo + 1; - if (wf_length < 4) return; // We cannot do anything here - // Adjust the band +int wf_compute_distance_end2end( + wavefront_t* const wavefront, + const int pattern_length, + const int text_length, + wf_offset_t* const distances) { + // Compute min-distance const wf_offset_t* const offsets = wavefront->offsets; - const int max_wf_length = wf_heuristic->max_k - wf_heuristic->min_k + 1; - if (wf_length > max_wf_length) { - // Sample wavefront - const int leeway = (wf_length - max_wf_length) / 2; - const int quarter = wf_length / 4; - const int dist_p0 = wf_compute_distance_end2end( - offsets[lo],lo,pattern_length,text_length); - const int dist_p1 = wf_compute_distance_end2end( - offsets[lo+quarter],lo+quarter,pattern_length,text_length); - const int dist_p2 = wf_compute_distance_end2end( - offsets[lo+2*quarter],lo+2*quarter,pattern_length,text_length); - const int dist_p3 = wf_compute_distance_end2end( - offsets[hi],hi,pattern_length,text_length); - // Heuristically decide where to place the band - int new_lo = lo; - if (dist_p0 > dist_p3) new_lo += leeway; - if (dist_p1 > dist_p2) new_lo += leeway; - // Set wavefront limits - wavefront->lo = new_lo; - if (wavefront->lo < lo) wavefront->lo = lo; - wavefront->hi = new_lo + max_wf_length - 1; - if (wavefront->hi > hi) wavefront->hi = hi; + int k, min_distance = MAX(pattern_length,text_length); + PRAGMA_LOOP_VECTORIZE + for (k=wavefront->lo;k<=wavefront->hi;++k) { + const int distance = wf_distance_end2end( + offsets[k],k,pattern_length,text_length); + distances[k] = distance; + min_distance = MIN(min_distance,distance); } - // Set wait steps (don't repeat this heuristic often) - wf_heuristic->steps_wait = wf_heuristic->steps_between_cutoffs; + return min_distance; } -/* - * Cut-off Wavefront Adaptive - */ -int wavefront_compute_distance_end2end( +int wf_compute_distance_end2end_weighted( wavefront_t* const wavefront, const int pattern_length, const int text_length, wf_offset_t* const distances) { + // Parameters + const int mfactor = ((float)(pattern_length + text_length) / 2); // Mean sequence length // Compute min-distance const wf_offset_t* const offsets = wavefront->offsets; int k, min_distance = MAX(pattern_length,text_length); PRAGMA_LOOP_VECTORIZE for (k=wavefront->lo;k<=wavefront->hi;++k) { - const int distance = wf_compute_distance_end2end( - offsets[k],k,pattern_length,text_length); + const int distance = wf_distance_end2end_weighted( + offsets[k],k,pattern_length,text_length,mfactor); distances[k] = distance; min_distance = MIN(min_distance,distance); } return min_distance; } -int wavefront_compute_distance_endsfree( +int wf_compute_distance_endsfree( wavefront_t* const wavefront, const int pattern_length, const int text_length, @@ -249,7 +221,7 @@ int wavefront_compute_distance_endsfree( int k, min_distance = MAX(pattern_length,text_length); PRAGMA_LOOP_VECTORIZE for (k=wavefront->lo;k<=wavefront->hi;++k) { - const int distance = wf_compute_distance_endsfree( + const int distance = wf_distance_endsfree( offsets[k],k,pattern_length,text_length, pattern_end_free,text_end_free); distances[k] = distance; @@ -257,7 +229,7 @@ int wavefront_compute_distance_endsfree( } return min_distance; } -void wavefront_cufoff_wfadaptive_reduce( +void wf_heuristic_wfadaptive_reduce( wavefront_t* const wavefront, const wf_offset_t* const distances, const int min_distance, @@ -282,12 +254,14 @@ void wavefront_cufoff_wfadaptive_reduce( } wavefront->hi = hi_reduced; } -void wavefront_cufoff_wfadaptive( +void wavefront_heuristic_wfadaptive( wavefront_aligner_t* const wf_aligner, - wavefront_t* const wavefront) { + wavefront_t* const wavefront, + const bool wfmash_mode) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const int min_wavefront_length = wf_aligner->heuristic.min_wavefront_length; const int max_distance_threshold = wf_aligner->heuristic.max_distance_threshold; wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; @@ -301,56 +275,58 @@ void wavefront_cufoff_wfadaptive( wavefront_components_resize_null__victim(&wf_aligner->wf_components,base_lo-1,base_hi+1); wf_offset_t* const distances = wf_aligner->wf_components.wavefront_victim->offsets; // Compute distance & cut-off -// const int pattern_end_free = wf_aligner->alignment_form.pattern_end_free; -// const int text_end_free = wf_aligner->alignment_form.text_end_free; -// if ((wf_aligner->alignment_form.span == alignment_end2end) || -// (pattern_end_free==0 && text_end_free==0)) { - const int min_distance = wavefront_compute_distance_end2end( + int min_distance; + if (wfmash_mode) { + min_distance = wf_compute_distance_end2end_weighted( + wavefront,pattern_length,text_length,distances); + } else { + min_distance = wf_compute_distance_end2end( wavefront,pattern_length,text_length,distances); - // Cut-off wavefront - const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); - wavefront_cufoff_wfadaptive_reduce( - wavefront,distances,min_distance,max_distance_threshold, - alignment_k,alignment_k); -// } -// else { -// const int min_distance = wavefront_compute_distance_endsfree( -// wavefront,pattern_length,text_length, -// pattern_end_free,text_end_free,distances); -// // Cut-off wavefront -// const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); -// wavefront_cufoff_wfadaptive_reduce( -// wavefront,distances,min_distance,max_distance_threshold, -// alignment_k-text_end_free,alignment_k+pattern_end_free); -// } + } + // Cut-off wavefront + const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); + wf_heuristic_wfadaptive_reduce( + wavefront,distances,min_distance,max_distance_threshold, + alignment_k,alignment_k); // Set wait steps (don't repeat this heuristic often) wf_heuristic->steps_wait = wf_heuristic->steps_between_cutoffs; } /* - * Drops + * Heuristic Cut-off Drops */ -void wavefront_compute_sw_scores( +void wf_heuristic_compute_sw_scores( + wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront, - const int score, + const int wf_score, wf_offset_t* const sw_scores, wf_offset_t* const max_sw_score, - wf_offset_t* const max_sw_score_k) { + wf_offset_t* const max_k, + wf_offset_t* const max_offset) { + // Parameters + const int wf_match = wf_aligner->penalties.match; + const int swg_match = (wf_match!=0) ? -(wf_aligner->penalties.match) : -1; // Compute min-distance const wf_offset_t* const offsets = wavefront->offsets; - int k, score_max = -score, score_max_k = 0; + int k, cmax_sw_score = INT_MIN, cmax_k = 0, cmax_offset = 0; PRAGMA_LOOP_VECTORIZE for (k=wavefront->lo;k<=wavefront->hi;++k) { - const int sw_score = wf_compute_sw_score(score,offsets[k],k); + const wf_offset_t offset = offsets[k]; + if (offset < 0) continue; + const int v = WAVEFRONT_V(k,offset); + const int h = WAVEFRONT_H(k,offset); + const int sw_score = WF_SCORE_TO_SW_SCORE(swg_match,v,h,wf_score); sw_scores[k] = sw_score; - if (score_max < sw_score) { - score_max = sw_score; - score_max_k = k; + if (cmax_sw_score < sw_score) { + cmax_sw_score = sw_score; + cmax_k = k; + cmax_offset = offset; } } - *max_sw_score = score_max; - *max_sw_score_k = score_max_k; + *max_sw_score = cmax_sw_score; + *max_k = cmax_k; + *max_offset = cmax_offset; } -void wavefront_cufoff_xdrop( +void wavefront_heuristic_xdrop( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront, const int score) { @@ -363,39 +339,65 @@ void wavefront_cufoff_xdrop( // Use victim as temporal buffer wavefront_components_resize_null__victim(&wf_aligner->wf_components,base_lo-1,base_hi+1); wf_offset_t* const sw_scores = wf_aligner->wf_components.wavefront_victim->offsets; - // Compute SW scores (classic scores) - wf_offset_t current_max_sw_score; - wf_offset_t current_max_sw_score_k; - wavefront_compute_sw_scores( - wavefront,score,sw_scores, - ¤t_max_sw_score,¤t_max_sw_score_k); + // Compute SW scores + wf_offset_t cmax_sw_score, cmax_k, dummy; + wf_heuristic_compute_sw_scores( + wf_aligner,wavefront,score,sw_scores, + &cmax_sw_score,&cmax_k,&dummy); // Apply X-Drop const int xdrop = wf_heuristic->xdrop; const int max_sw_score = wf_heuristic->max_sw_score; - const int max_sw_score_k = wf_heuristic->max_sw_score_k; - if (max_sw_score_k != DPMATRIX_DIAGONAL_NULL) { + const wf_offset_t* const offsets = wavefront->offsets; + if (wf_heuristic->max_sw_score_k != DPMATRIX_DIAGONAL_NULL) { // Reduce from bottom - int k, lo_reduced = wavefront->lo; + int k; for (k=wavefront->lo;k<=wavefront->hi;++k) { - if ((int)sw_scores[k] >= max_sw_score - xdrop) break; - ++lo_reduced; + if (offsets[k] < 0) continue; + //fprintf(stderr,"[XDROP] (max=%d,current=%d) diff=%d leeway=%d\n", + // max_sw_score,(int)sw_scores[k], + // max_sw_score - (int)sw_scores[k],xdrop); + if (max_sw_score - (int)sw_scores[k] < xdrop) break; } - wavefront->lo = lo_reduced; + wavefront->lo = k; // Reduce from top - int hi_reduced = wavefront->hi; for (k=wavefront->hi;k>=wavefront->lo;--k) { - if ((int)sw_scores[k] >= max_sw_score - xdrop) break; - --hi_reduced; + if (offsets[k] < 0) continue; + //fprintf(stderr,"[XDROP] (max=%d,current=%d) diff=%d leeway=%d\n", + // max_sw_score,(int)sw_scores[k], + // max_sw_score - (int)sw_scores[k],xdrop); + if (max_sw_score - (int)sw_scores[k] < xdrop) break; + } + wavefront->hi = k; + // Update maximum score observed + if (cmax_sw_score > wf_heuristic->max_sw_score) { + wf_heuristic->max_sw_score = cmax_sw_score; + wf_heuristic->max_sw_score_k = cmax_k; } - wavefront->hi = hi_reduced; + } else { + // Update maximum score observed + wf_heuristic->max_sw_score = cmax_sw_score; + wf_heuristic->max_sw_score_k = cmax_k; } - // Update maximum score observed - wf_heuristic->max_sw_score = current_max_sw_score; - wf_heuristic->max_sw_score_k = current_max_sw_score_k; // Set wait steps (don't repeat this heuristic often) wf_heuristic->steps_wait = wf_heuristic->steps_between_cutoffs; } -void wavefront_cufoff_zdrop( +int wf_zdrop_gap_score( + wavefront_aligner_t* const wf_aligner, + const wf_offset_t offset_1, + const int k_1, + const wf_offset_t offset_2, + const int k_2) { + // Parameters + const int gap_e = wf_aligner->penalties.internal_gap_e; + // Compute + int diff_h = WAVEFRONT_H(k_2,offset_2) - WAVEFRONT_H(k_1,offset_1); + if (diff_h < 0) diff_h = -diff_h; + int diff_v = WAVEFRONT_V(k_2,offset_2) - WAVEFRONT_V(k_1,offset_1); + if (diff_v < 0) diff_v = -diff_v; + const int gap_length = (diff_h >= diff_v) ? diff_h-diff_v : diff_v-diff_h; + return gap_length * gap_e; +} +bool wavefront_heuristic_zdrop( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront, const int score) { @@ -404,68 +406,105 @@ void wavefront_cufoff_zdrop( const int base_hi = wavefront->hi; const int base_lo = wavefront->lo; // Check steps - if (wf_heuristic->steps_wait > 0) return; + if (wf_heuristic->steps_wait > 0) return false; // Use victim as temporal buffer wavefront_components_resize_null__victim(&wf_aligner->wf_components,base_lo-1,base_hi+1); wf_offset_t* const sw_scores = wf_aligner->wf_components.wavefront_victim->offsets; - // Compute SW scores (classic scores) - wf_offset_t current_max_sw_score; - wf_offset_t current_max_sw_score_k; - wavefront_compute_sw_scores( - wavefront,score,sw_scores, - ¤t_max_sw_score,¤t_max_sw_score_k); - // Apply X-Drop - const wf_offset_t* const offsets = wavefront->offsets; - wavefronts_penalties_t* const penalties = &wf_aligner->penalties; - const int gap_extension = (penalties->gap_extension1 > 0) ? penalties->gap_extension1 : 1; + // Compute SW scores + wf_offset_t cmax_sw_score, cmax_k, cmax_offset; + wf_heuristic_compute_sw_scores( + wf_aligner,wavefront,score,sw_scores, + &cmax_sw_score,&cmax_k,&cmax_offset); + // Apply Z-Drop const int zdrop = wf_heuristic->zdrop; const int max_sw_score = wf_heuristic->max_sw_score; - const int max_sw_score_k = wf_heuristic->max_sw_score_k; - const int max_sw_score_offset = wf_heuristic->max_sw_score_offset; - if (max_sw_score_k != DPMATRIX_DIAGONAL_NULL) { - // Reduce from bottom - int k, lo_reduced = wavefront->lo; - for (k=wavefront->lo;k<=wavefront->hi;++k) { - if (offsets[k] < 0) { - ++lo_reduced; - continue; - } - const int single_gap = wf_compute_sw_score_single_gap( - gap_extension,max_sw_score_offset,max_sw_score_k,offsets[k],k); - if ((int)sw_scores[k] > max_sw_score - (zdrop + single_gap)) break; - ++lo_reduced; - } - wavefront->lo = lo_reduced; - // Reduce from top - int hi_reduced = wavefront->hi; - for (k=wavefront->hi;k>=wavefront->lo;--k) { - if (offsets[k] < 0) { - --hi_reduced; - continue; - } - const int single_gap = wf_compute_sw_score_single_gap( - gap_extension,max_sw_score_offset,max_sw_score_k,offsets[k],k); - if ((int)sw_scores[k] > max_sw_score - (zdrop + single_gap)) break; - --hi_reduced; - } - wavefront->hi = hi_reduced; + const int max_k = wf_heuristic->max_sw_score_k; + const int max_offset = wf_heuristic->max_sw_score_offset; + if (max_k != DPMATRIX_DIAGONAL_NULL) { // Update maximum score observed - if (current_max_sw_score > wf_heuristic->max_sw_score) { - wf_heuristic->max_sw_score = current_max_sw_score; - wf_heuristic->max_sw_score_k = current_max_sw_score_k; - wf_heuristic->max_sw_score_offset = max_sw_score_offset; + if (cmax_sw_score > wf_heuristic->max_sw_score) { + wf_heuristic->max_sw_score = cmax_sw_score; + wf_heuristic->max_wf_score = score; + wf_heuristic->max_sw_score_k = cmax_k; + wf_heuristic->max_sw_score_offset = cmax_offset; + } else { + // Test Z-drop + if (max_sw_score - (int)cmax_sw_score > zdrop) { + wf_aligner->alignment_end_pos.score = wf_heuristic->max_wf_score; + wf_aligner->alignment_end_pos.k = max_k; + wf_aligner->alignment_end_pos.offset = max_offset; + return true; // Z-dropped + } } } else { // Update maximum score observed - wf_heuristic->max_sw_score = current_max_sw_score; - wf_heuristic->max_sw_score_k = current_max_sw_score_k; - wf_heuristic->max_sw_score_offset = max_sw_score_offset; + wf_heuristic->max_sw_score = cmax_sw_score; + wf_heuristic->max_wf_score = score; + wf_heuristic->max_sw_score_k = cmax_k; + wf_heuristic->max_sw_score_offset = cmax_offset; } // Set wait steps (don't repeat this heuristic often) wf_heuristic->steps_wait = wf_heuristic->steps_between_cutoffs; + // Return NOT Z-dropped + return false; } /* - * Cut-offs dispatcher + * Heuristic Cut-off Banded + */ +void wavefront_heuristic_banded_static( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const wavefront) { + // Parameters + wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; + // Check wavefront limits + if (wavefront->lo < wf_heuristic->min_k) wavefront->lo = wf_heuristic->min_k; + if (wavefront->hi > wf_heuristic->max_k) wavefront->hi = wf_heuristic->max_k; +} +void wavefront_heuristic_banded_adaptive( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const wavefront) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; + // Check steps + if (wf_heuristic->steps_wait > 0) return; + // Check wavefront length + const int lo = wavefront->lo; + const int hi = wavefront->hi; + const int wf_length = hi - lo + 1; + if (wf_length < 4) return; // We cannot do anything here + // Adjust the band + const wf_offset_t* const offsets = wavefront->offsets; + const int max_wf_length = wf_heuristic->max_k - wf_heuristic->min_k + 1; + if (wf_length > max_wf_length) { + // Sample wavefront + const int leeway = (wf_length - max_wf_length) / 2; + const int quarter = wf_length / 4; + const int dist_p0 = wf_distance_end2end( + offsets[lo],lo,pattern_length,text_length); + const int dist_p1 = wf_distance_end2end( + offsets[lo+quarter],lo+quarter,pattern_length,text_length); + const int dist_p2 = wf_distance_end2end( + offsets[lo+2*quarter],lo+2*quarter,pattern_length,text_length); + const int dist_p3 = wf_distance_end2end( + offsets[hi],hi,pattern_length,text_length); + // Heuristically decide where to place the band + int new_lo = lo; + if (dist_p0 > dist_p3) new_lo += leeway; + if (dist_p1 > dist_p2) new_lo += leeway; + // Set wavefront limits + wavefront->lo = new_lo; + if (wavefront->lo < lo) wavefront->lo = lo; + wavefront->hi = new_lo + max_wf_length - 1; + if (wavefront->hi > hi) wavefront->hi = hi; + } + // Set wait steps (don't repeat this heuristic often) + wf_heuristic->steps_wait = wf_heuristic->steps_between_cutoffs; +} +/* + * Heuristic Cut-offs dispatcher */ bool wavefront_heuristic_cufoff( wavefront_aligner_t* const wf_aligner, @@ -477,50 +516,54 @@ bool wavefront_heuristic_cufoff( wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; // Fetch m-wavefront wavefront_t* const mwavefront = wf_components->mwavefronts[score_mod]; - if (mwavefront==NULL || mwavefront->lo > mwavefront->hi) return false; // Not dropped - // Cut-off m-wavefront + if (mwavefront == NULL || mwavefront->lo > mwavefront->hi) return false; // Not Dropped + // Decrease wait steps --(wf_heuristic->steps_wait); - if (wf_heuristic->strategy == wf_heuristic_banded_static) { - wavefront_cufoff_banded_static(wf_aligner,mwavefront); - } - if (wf_heuristic->strategy == wf_heuristic_banded_adaptive) { - wavefront_cufoff_banded_adaptive(wf_aligner,mwavefront); + // Save lo/hi base + const int hi_base = mwavefront->hi; + const int lo_base = mwavefront->lo; + // Select heuristic (WF-Adaptive) + if (wf_heuristic->strategy & wf_heuristic_wfadaptive) { + wavefront_heuristic_wfadaptive(wf_aligner,mwavefront,false); + } else if (wf_heuristic->strategy & wf_heuristic_wfmash) { + wavefront_heuristic_wfadaptive(wf_aligner,mwavefront,true); } - if (wf_heuristic->strategy == wf_heuristic_wfadaptive) { - wavefront_cufoff_wfadaptive(wf_aligner,mwavefront); + // Select heuristic (Drops) + if (wf_heuristic->strategy & wf_heuristic_xdrop) { + wavefront_heuristic_xdrop(wf_aligner,mwavefront,score); + } else if (wf_heuristic->strategy & wf_heuristic_zdrop) { + if (wavefront_heuristic_zdrop(wf_aligner,mwavefront,score)) return true; // Z-Dropped } - if (wf_heuristic->strategy == wf_heuristic_xdrop) { - wavefront_cufoff_xdrop(wf_aligner,mwavefront,score); - } - if (wf_heuristic->strategy == wf_heuristic_zdrop) { - wavefront_cufoff_zdrop(wf_aligner,mwavefront,score); + // Select heuristic (Banded) + if (wf_heuristic->strategy & wf_heuristic_banded_static) { + wavefront_heuristic_banded_static(wf_aligner,mwavefront); + } else if (wf_heuristic->strategy & wf_heuristic_banded_adaptive) { + wavefront_heuristic_banded_adaptive(wf_aligner,mwavefront); } // Check wavefront length - if (mwavefront->lo > mwavefront->hi) return true; // Dropped alignment + if (lo_base == mwavefront->lo && hi_base == mwavefront->hi) return false; // No wavefronts pruned + if (mwavefront->lo > mwavefront->hi) mwavefront->null = true; + // DEBUG + // const int wf_length_base = hi_base-lo_base+1; + // const int wf_length_reduced = mwavefront->hi-mwavefront->lo+1; + // fprintf(stderr,"[WFA::Heuristic] Heuristic from %d to %d offsets (%2.2f%%)\n", + // wf_length_base,wf_length_reduced,100.0f*(float)wf_length_reduced/(float)wf_length_base); // Save min/max WF initialized mwavefront->wf_elements_init_min = mwavefront->lo; mwavefront->wf_elements_init_max = mwavefront->hi; // Equate other wavefronts - if (distance_metric <= gap_linear) return false; // Not dropped + if (distance_metric <= gap_linear) return false; // Not Dropped // Cut-off the other wavefronts (same dimensions as M) wavefront_t* const i1wavefront = wf_components->i1wavefronts[score_mod]; wavefront_t* const d1wavefront = wf_components->d1wavefronts[score_mod]; - wavefront_heuristic_cutoff_equate(i1wavefront,mwavefront); - wavefront_heuristic_cutoff_equate(d1wavefront,mwavefront); - if (distance_metric == gap_affine) return false; // Not dropped + wf_heuristic_equate(i1wavefront,mwavefront); + wf_heuristic_equate(d1wavefront,mwavefront); + if (distance_metric == gap_affine) return false; // Not Dropped wavefront_t* const i2wavefront = wf_components->i2wavefronts[score_mod]; wavefront_t* const d2wavefront = wf_components->d2wavefronts[score_mod]; - wavefront_heuristic_cutoff_equate(i2wavefront,mwavefront); - wavefront_heuristic_cutoff_equate(d2wavefront,mwavefront); - // Return - return false; // Not dropped - // DEBUG - // if (wf_aligner->system.verbose) { - // const int wf_length_base = hi_base-lo_base+1; - // const int wf_length_reduced = mwavefront->hi-mwavefront->lo+1; - // fprintf(stderr,"[WFA::Heuristic] Heuristic from %d to %d offsets (%2.2f%%)\n", - // wf_length_base,wf_length_reduced,100.0f*(float)wf_length_reduced/(float)wf_length_base); - // } + wf_heuristic_equate(i2wavefront,mwavefront); + wf_heuristic_equate(d2wavefront,mwavefront); + return false; // Not Dropped } /* * Display @@ -531,27 +574,41 @@ void wavefront_heuristic_print( // Select heuristic strategy if (wf_heuristic->strategy == wf_heuristic_none) { fprintf(stream,"(none)"); - } else if (wf_heuristic->strategy == wf_heuristic_banded_static) { - fprintf(stream,"(banded-static,%d,%d)", - wf_heuristic->min_k, - wf_heuristic->max_k); - } else if (wf_heuristic->strategy == wf_heuristic_banded_adaptive) { - fprintf(stream,"(banded-adapt,%d,%d,%d)", - wf_heuristic->min_k, - wf_heuristic->max_k, - wf_heuristic->steps_between_cutoffs); - } else if (wf_heuristic->strategy == wf_heuristic_wfadaptive) { - fprintf(stream,"(wf-adapt,%d,%d,%d)", - wf_heuristic->min_wavefront_length, - wf_heuristic->max_distance_threshold, - wf_heuristic->steps_between_cutoffs); - } else if (wf_heuristic->strategy == wf_heuristic_xdrop) { - fprintf(stream,"(xdrop,%d,%d)", - wf_heuristic->xdrop, - wf_heuristic->steps_between_cutoffs); - } else if (wf_heuristic->strategy == wf_heuristic_zdrop) { - fprintf(stream,"(zdrop,%d,%d)", - wf_heuristic->zdrop, - wf_heuristic->steps_between_cutoffs); + } else { + // WF-Adaptive + if (wf_heuristic->strategy & wf_heuristic_wfadaptive) { + fprintf(stream,"(wfadapt,%d,%d,%d)", + wf_heuristic->min_wavefront_length, + wf_heuristic->max_distance_threshold, + wf_heuristic->steps_between_cutoffs); + } else if (wf_heuristic->strategy & wf_heuristic_wfmash) { + fprintf(stream,"(wfmash,%d,%d,%d)", + wf_heuristic->min_wavefront_length, + wf_heuristic->max_distance_threshold, + wf_heuristic->steps_between_cutoffs); + } + // Drops + if (wf_heuristic->strategy & wf_heuristic_xdrop) { + fprintf(stream,"(xdrop,%d,%d)", + wf_heuristic->xdrop, + wf_heuristic->steps_between_cutoffs); + } + if (wf_heuristic->strategy & wf_heuristic_zdrop) { + fprintf(stream,"(zdrop,%d,%d)", + wf_heuristic->zdrop, + wf_heuristic->steps_between_cutoffs); + } + // Banded + if (wf_heuristic->strategy & wf_heuristic_banded_static) { + fprintf(stream,"(banded-static,%d,%d)", + wf_heuristic->min_k, + wf_heuristic->max_k); + } + if (wf_heuristic->strategy & wf_heuristic_banded_adaptive) { + fprintf(stream,"(banded-adapt,%d,%d,%d)", + wf_heuristic->min_k, + wf_heuristic->max_k, + wf_heuristic->steps_between_cutoffs); + } } } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_heuristic.h b/pywfa/WFA2_lib/wavefront/wavefront_heuristic.h index 6945a10..b230a41 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_heuristic.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_heuristic.h @@ -32,8 +32,6 @@ #ifndef WAVEFRONT_HEURISTIC_H_ #define WAVEFRONT_HEURISTIC_H_ -#include "utils/commons.h" - // Wavefront ahead definition typedef struct _wavefront_aligner_t wavefront_aligner_t; @@ -47,15 +45,16 @@ typedef enum { wf_heuristic_wfadaptive = 0x0000000000000004ul, wf_heuristic_xdrop = 0x0000000000000010ul, wf_heuristic_zdrop = 0x0000000000000020ul, + wf_heuristic_wfmash = 0x0000000000000040ul, } wf_heuristic_strategy; typedef struct { // Heuristic wf_heuristic_strategy strategy; // Heuristic strategy int steps_between_cutoffs; // Score-steps between heuristic cut-offs - // Banded + // Static/Adaptive Banded int min_k; // Banded: Minimum k to consider in band int max_k; // Banded: Maximum k to consider in band - // Adaptive + // WFAdaptive int min_wavefront_length; // Adaptive: Minimum wavefronts length to cut-off int max_distance_threshold; // Adaptive: Maximum distance between offsets allowed // Drops @@ -63,7 +62,8 @@ typedef struct { int zdrop; // Z-drop parameter // Internals int steps_wait; // Score-steps until next cut-off - int max_sw_score; // Maximum score observed (for x/z drops) + int max_sw_score; // Maximum swg-score observed (for x/z drops) + int max_wf_score; // Corresponding wf-score (to max_sw_score) int max_sw_score_offset; // Offset of the maximum score observed int max_sw_score_k; // Diagonal of the maximum score observed } wavefront_heuristic_t; @@ -73,20 +73,18 @@ typedef struct { */ void wavefront_heuristic_set_none( wavefront_heuristic_t* const wf_heuristic); -void wavefront_heuristic_set_banded_static( - wavefront_heuristic_t* const wf_heuristic, - const int band_min_k, - const int band_max_k); -void wavefront_heuristic_set_banded_adaptive( + +void wavefront_heuristic_set_wfadaptive( wavefront_heuristic_t* const wf_heuristic, - const int band_min_k, - const int band_max_k, + const int min_wavefront_length, + const int max_distance_threshold, const int steps_between_cutoffs); -void wavefront_heuristic_set_wfadaptive( +void wavefront_heuristic_set_wfmash( wavefront_heuristic_t* const wf_heuristic, const int min_wavefront_length, const int max_distance_threshold, const int steps_between_cutoffs); + void wavefront_heuristic_set_xdrop( wavefront_heuristic_t* const wf_heuristic, const int xdrop, @@ -96,6 +94,16 @@ void wavefront_heuristic_set_zdrop( const int ydrop, const int steps_between_cutoffs); +void wavefront_heuristic_set_banded_static( + wavefront_heuristic_t* const wf_heuristic, + const int band_min_k, + const int band_max_k); +void wavefront_heuristic_set_banded_adaptive( + wavefront_heuristic_t* const wf_heuristic, + const int band_min_k, + const int band_max_k, + const int steps_between_cutoffs); + void wavefront_heuristic_clear( wavefront_heuristic_t* const wf_heuristic); diff --git a/pywfa/WFA2_lib/wavefront/wavefront_offset.h b/pywfa/WFA2_lib/wavefront/wavefront_offset.h index 42d59b7..d1ee5c1 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_offset.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_offset.h @@ -32,8 +32,6 @@ #ifndef WAVEFRONT_OFFSET_H_ #define WAVEFRONT_OFFSET_H_ -#include "utils/commons.h" - /* * Wavefront Offset */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_pcigar.c b/pywfa/WFA2_lib/wavefront/wavefront_pcigar.c index a66e066..4f6964c 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_pcigar.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_pcigar.c @@ -29,6 +29,8 @@ * DESCRIPTION: Packed CIGAR (Alignment operations in 2-bits) */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_pcigar.h" /* @@ -150,17 +152,16 @@ int pcigar_unpack_extend_custom( */ void pcigar_unpack_linear( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, int* const cigar_length) { // Parameters + char* const pattern = sequences->pattern; + const int pattern_length = sequences->pattern_length; + char* const text = sequences->text; + const int text_length = sequences->text_length; char* const cigar_buffer_base = cigar_buffer; // Compute pcigar length and shift to the end of the word int pcigar_length = PCIGAR_MAX_LENGTH; @@ -174,10 +175,10 @@ void pcigar_unpack_linear( for (i=0;imode == wf_sequences_lambda) { // Custom extend-match function num_matches = pcigar_unpack_extend_custom( - pattern_length,text_length, - match_funct,match_funct_arguments,v,h,cigar_buffer); + pattern_length,text_length,sequences->match_funct, + sequences->match_funct_arguments,v,h,cigar_buffer); } else { num_matches = pcigar_unpack_extend( pattern,pattern_length,text,text_length,v,h,cigar_buffer); @@ -202,18 +203,17 @@ void pcigar_unpack_linear( } void pcigar_unpack_affine( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, int* const cigar_length, affine_matrix_type* const current_matrix_type) { // Parameters + char* const pattern = sequences->pattern; + const int pattern_length = sequences->pattern_length; + char* const text = sequences->text; + const int text_length = sequences->text_length; char* const cigar_buffer_base = cigar_buffer; // Compute pcigar length and shift to the end of the word int pcigar_length = PCIGAR_MAX_LENGTH; @@ -229,10 +229,10 @@ void pcigar_unpack_affine( // Extend exact-matches if (matrix_type == affine_matrix_M) { // Extend only on the M-wavefront int num_matches; - if (match_funct != NULL) { // Custom extend-match function + if (sequences->mode == wf_sequences_lambda) { // Custom extend-match function num_matches = pcigar_unpack_extend_custom( - pattern_length,text_length, - match_funct,match_funct_arguments,v,h,cigar_buffer); + pattern_length,text_length,sequences->match_funct, + sequences->match_funct_arguments,v,h,cigar_buffer); } else { num_matches = pcigar_unpack_extend( pattern,pattern_length,text,text_length,v,h,cigar_buffer); diff --git a/pywfa/WFA2_lib/wavefront/wavefront_pcigar.h b/pywfa/WFA2_lib/wavefront/wavefront_pcigar.h index f194c29..0f2edb7 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_pcigar.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_pcigar.h @@ -32,8 +32,8 @@ #ifndef WAVEFRONT_PACKED_CIGAR_H_ #define WAVEFRONT_PACKED_CIGAR_H_ -#include "utils/commons.h" #include "wavefront_attributes.h" +#include "wavefront_sequences.h" /* * Configuration @@ -91,24 +91,14 @@ int pcigar_unpack( */ void pcigar_unpack_linear( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, int* const cigar_length); void pcigar_unpack_affine( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, diff --git a/pywfa/WFA2_lib/wavefront/wavefront_penalties.c b/pywfa/WFA2_lib/wavefront/wavefront_penalties.c index d606858..c6e8582 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_penalties.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_penalties.c @@ -29,38 +29,42 @@ * DESCRIPTION: WaveFront penalties handling module */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_penalties.h" /* * Penalties adjustment */ -void wavefronts_penalties_set_indel( - wavefronts_penalties_t* const wavefronts_penalties) { +void wavefront_penalties_set_indel( + wavefront_penalties_t* const wf_penalties) { // Set distance model - wavefronts_penalties->distance_metric = indel; + wf_penalties->distance_metric = indel; // Set penalties - wavefronts_penalties->mismatch = -1; - wavefronts_penalties->gap_opening1 = 1; - wavefronts_penalties->gap_extension1 = -1; - wavefronts_penalties->gap_opening2 = -1; - wavefronts_penalties->gap_extension2 = -1; + wf_penalties->match = 0; + wf_penalties->mismatch = -1; + wf_penalties->gap_opening1 = 1; + wf_penalties->gap_extension1 = -1; + wf_penalties->gap_opening2 = -1; + wf_penalties->gap_extension2 = -1; } -void wavefronts_penalties_set_edit( - wavefronts_penalties_t* const wavefronts_penalties) { +void wavefront_penalties_set_edit( + wavefront_penalties_t* const wf_penalties) { // Set distance model - wavefronts_penalties->distance_metric = edit; + wf_penalties->distance_metric = edit; // Set penalties - wavefronts_penalties->mismatch = 1; - wavefronts_penalties->gap_opening1 = 1; - wavefronts_penalties->gap_extension1 = -1; - wavefronts_penalties->gap_opening2 = -1; - wavefronts_penalties->gap_extension2 = -1; + wf_penalties->match = 0; + wf_penalties->mismatch = 1; + wf_penalties->gap_opening1 = 1; + wf_penalties->gap_extension1 = -1; + wf_penalties->gap_opening2 = -1; + wf_penalties->gap_extension2 = -1; } -void wavefronts_penalties_set_linear( - wavefronts_penalties_t* const wavefronts_penalties, +void wavefront_penalties_set_linear( + wavefront_penalties_t* const wf_penalties, linear_penalties_t* const linear_penalties) { // Set distance model - wavefronts_penalties->distance_metric = gap_linear; + wf_penalties->distance_metric = gap_linear; // Check base penalties if (linear_penalties->match > 0) { fprintf(stderr,"[WFA::Penalties] Match score must be negative or zero (M=%d)\n",linear_penalties->match); @@ -72,24 +76,27 @@ void wavefronts_penalties_set_linear( } // Set penalties (if needed, adjust using Eizenga's formula) if (linear_penalties->match < 0) { - wavefronts_penalties->match = linear_penalties->match; - wavefronts_penalties->mismatch = 2*linear_penalties->mismatch - 2*linear_penalties->match; - wavefronts_penalties->gap_opening1 = 2*linear_penalties->indel - linear_penalties->match; + wf_penalties->match = linear_penalties->match; + wf_penalties->mismatch = 2*linear_penalties->mismatch - 2*linear_penalties->match; + wf_penalties->gap_opening1 = 2*linear_penalties->indel - linear_penalties->match; } else { - wavefronts_penalties->match = 0; - wavefronts_penalties->mismatch = linear_penalties->mismatch; - wavefronts_penalties->gap_opening1 = linear_penalties->indel; + wf_penalties->match = 0; + wf_penalties->mismatch = linear_penalties->mismatch; + wf_penalties->gap_opening1 = linear_penalties->indel; } // Set unused - wavefronts_penalties->gap_extension1 = -1; - wavefronts_penalties->gap_opening2 = -1; - wavefronts_penalties->gap_extension2 = -1; + wf_penalties->gap_extension1 = -1; + wf_penalties->gap_opening2 = -1; + wf_penalties->gap_extension2 = -1; + // Internals + wf_penalties->linear_penalties = *linear_penalties; + wf_penalties->internal_gap_e = linear_penalties->indel; } -void wavefronts_penalties_set_affine( - wavefronts_penalties_t* const wavefronts_penalties, +void wavefront_penalties_set_affine( + wavefront_penalties_t* const wf_penalties, affine_penalties_t* const affine_penalties) { // Set distance model - wavefronts_penalties->distance_metric = gap_affine; + wf_penalties->distance_metric = gap_affine; // Check base penalties if (affine_penalties->match > 0) { fprintf(stderr,"[WFA::Penalties] Match score must be negative or zero (M=%d)\n",affine_penalties->match); @@ -105,33 +112,36 @@ void wavefronts_penalties_set_affine( } // Set penalties (if needed, adjust using Eizenga's formula) if (affine_penalties->match < 0) { - wavefronts_penalties->match = affine_penalties->match; - wavefronts_penalties->mismatch = 2*affine_penalties->mismatch - 2*affine_penalties->match; - wavefronts_penalties->gap_opening1 = 2*affine_penalties->gap_opening; - wavefronts_penalties->gap_extension1 = 2*affine_penalties->gap_extension - affine_penalties->match; + wf_penalties->match = affine_penalties->match; + wf_penalties->mismatch = 2*affine_penalties->mismatch - 2*affine_penalties->match; + wf_penalties->gap_opening1 = 2*affine_penalties->gap_opening; + wf_penalties->gap_extension1 = 2*affine_penalties->gap_extension - affine_penalties->match; } else { - wavefronts_penalties->match = 0; - wavefronts_penalties->mismatch = affine_penalties->mismatch; - wavefronts_penalties->gap_opening1 = affine_penalties->gap_opening; - wavefronts_penalties->gap_extension1 = affine_penalties->gap_extension; + wf_penalties->match = 0; + wf_penalties->mismatch = affine_penalties->mismatch; + wf_penalties->gap_opening1 = affine_penalties->gap_opening; + wf_penalties->gap_extension1 = affine_penalties->gap_extension; } // Set unused - wavefronts_penalties->gap_opening2 = -1; - wavefronts_penalties->gap_extension2 = -1; + wf_penalties->gap_opening2 = -1; + wf_penalties->gap_extension2 = -1; + // Internals + wf_penalties->affine_penalties = *affine_penalties; + wf_penalties->internal_gap_e = affine_penalties->gap_extension; } -void wavefronts_penalties_set_affine2p( - wavefronts_penalties_t* const wavefronts_penalties, +void wavefront_penalties_set_affine2p( + wavefront_penalties_t* const wf_penalties, affine2p_penalties_t* const affine2p_penalties) { // Set distance model - wavefronts_penalties->distance_metric = gap_affine_2p; + wf_penalties->distance_metric = gap_affine_2p; // Check base penalties if (affine2p_penalties->match > 0) { fprintf(stderr,"[WFA::Penalties] Match score must be negative or zero (M=%d)\n",affine2p_penalties->match); exit(1); } else if (affine2p_penalties->mismatch <= 0 || - affine2p_penalties->gap_opening1 <= 0 || + affine2p_penalties->gap_opening1 < 0 || affine2p_penalties->gap_extension1 <= 0 || - affine2p_penalties->gap_opening2 <= 0 || + affine2p_penalties->gap_opening2 < 0 || affine2p_penalties->gap_extension2 <= 0) { fprintf(stderr,"[WFA::Penalties] Penalties (X=%d,O1=%d,E1=%d,O2=%d,E2=%d) must be (X>0,O1>=0,E1>0,O1>=0,E1>0)\n", affine2p_penalties->mismatch, @@ -143,57 +153,61 @@ void wavefronts_penalties_set_affine2p( } // Set penalties (if needed, adjust using Eizenga's formula) if (affine2p_penalties->match < 0) { - wavefronts_penalties->match = affine2p_penalties->match; - wavefronts_penalties->mismatch = 2*affine2p_penalties->mismatch - 2*wavefronts_penalties->match; - wavefronts_penalties->gap_opening1 = 2*affine2p_penalties->gap_opening1; - wavefronts_penalties->gap_extension1 = 2*affine2p_penalties->gap_extension1 - affine2p_penalties->match; - wavefronts_penalties->gap_opening2 = 2*affine2p_penalties->gap_opening2; - wavefronts_penalties->gap_extension2 = 2*affine2p_penalties->gap_extension2 - affine2p_penalties->match; + wf_penalties->match = affine2p_penalties->match; + wf_penalties->mismatch = 2*affine2p_penalties->mismatch - 2*wf_penalties->match; + wf_penalties->gap_opening1 = 2*affine2p_penalties->gap_opening1; + wf_penalties->gap_extension1 = 2*affine2p_penalties->gap_extension1 - affine2p_penalties->match; + wf_penalties->gap_opening2 = 2*affine2p_penalties->gap_opening2; + wf_penalties->gap_extension2 = 2*affine2p_penalties->gap_extension2 - affine2p_penalties->match; } else { - wavefronts_penalties->match = 0; - wavefronts_penalties->mismatch = affine2p_penalties->mismatch; - wavefronts_penalties->gap_opening1 = affine2p_penalties->gap_opening1; - wavefronts_penalties->gap_extension1 = affine2p_penalties->gap_extension1; - wavefronts_penalties->gap_opening2 = affine2p_penalties->gap_opening2; - wavefronts_penalties->gap_extension2 = affine2p_penalties->gap_extension2; + wf_penalties->match = 0; + wf_penalties->mismatch = affine2p_penalties->mismatch; + wf_penalties->gap_opening1 = affine2p_penalties->gap_opening1; + wf_penalties->gap_extension1 = affine2p_penalties->gap_extension1; + wf_penalties->gap_opening2 = affine2p_penalties->gap_opening2; + wf_penalties->gap_extension2 = affine2p_penalties->gap_extension2; } + // Internals + wf_penalties->affine2p_penalties = *affine2p_penalties; + wf_penalties->internal_gap_e = affine2p_penalties->gap_extension1; } /* * Display */ -void wavefronts_penalties_print( +void wavefront_penalties_print( FILE* const stream, - wavefronts_penalties_t* const wavefronts_penalties) { + wavefront_penalties_t* const wf_penalties) { // Select penalties mode - switch (wavefronts_penalties->distance_metric) { + switch (wf_penalties->distance_metric) { case indel: - fprintf(stream,"(Indel)"); + fprintf(stream,"(Indel,0,inf,1)"); break; case edit: - fprintf(stream,"(Edit)"); + fprintf(stream,"(Edit,0,1,1)"); break; case gap_linear: - fprintf(stream,"(GapLinear,%d,%d)", - wavefronts_penalties->mismatch, - wavefronts_penalties->gap_opening1); + fprintf(stream,"(GapLinear,%d,%d,%d)", + wf_penalties->linear_penalties.match, + wf_penalties->linear_penalties.mismatch, + wf_penalties->linear_penalties.indel); break; case gap_affine: - fprintf(stream,"(GapAffine,%d,%d,%d)", - wavefronts_penalties->mismatch, - wavefronts_penalties->gap_opening1, - wavefronts_penalties->gap_extension1); + fprintf(stream,"(GapAffine,%d,%d,%d,%d)", + wf_penalties->affine_penalties.match, + wf_penalties->affine_penalties.mismatch, + wf_penalties->affine_penalties.gap_opening, + wf_penalties->affine_penalties.gap_extension); break; case gap_affine_2p: - fprintf(stream,"(GapAffine2p%d,%d,%d,%d,%d)", - wavefronts_penalties->mismatch, - wavefronts_penalties->gap_opening1, - wavefronts_penalties->gap_extension1, - wavefronts_penalties->gap_opening2, - wavefronts_penalties->gap_extension2); + fprintf(stream,"(GapAffine2p,%d,%d,%d,%d,%d,%d)", + wf_penalties->affine2p_penalties.match, + wf_penalties->affine2p_penalties.mismatch, + wf_penalties->affine2p_penalties.gap_opening1, + wf_penalties->affine2p_penalties.gap_extension1, + wf_penalties->affine2p_penalties.gap_opening2, + wf_penalties->affine2p_penalties.gap_extension2); break; default: break; } } - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_penalties.h b/pywfa/WFA2_lib/wavefront/wavefront_penalties.h index bd6a1a5..127a898 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_penalties.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_penalties.h @@ -51,62 +51,49 @@ typedef enum { * Wavefront Penalties */ typedef struct { - distance_metric_t distance_metric; // Alignment metric/distance used - int match; // (M <= 0) (Internal variable change to M=0 for WFA) + // Alignment metric/distance used + distance_metric_t distance_metric; + // Penalty values + int match; // (M <= 0) int mismatch; // (X > 0) int gap_opening1; // (O1 >= 0) int gap_extension1; // (E1 > 0) int gap_opening2; // (O2 >= 0) int gap_extension2; // (E2 > 0) -} wavefronts_penalties_t; + // Internals + linear_penalties_t linear_penalties; // Original gap-linear penalties + affine_penalties_t affine_penalties; // Original gap-affine penalties + affine2p_penalties_t affine2p_penalties; // Original gap-affine2p penalties + int internal_gap_e; // Original gap-extension value (used for z-drop) +} wavefront_penalties_t; /* * Compute SW-score equivalent (thanks to Eizenga's formula) */ -#define WF_PENALTIES_GET_SW_SCORE(swg_match_score,plen,tlen,wf_score) \ - (swg_match_score*(plen+tlen))/2 - wf_score +#define WF_SCORE_TO_SW_SCORE(swg_match,plen,tlen,wf_score) ((swg_match*(plen+tlen) - wf_score)/2) /* * Penalties adjustment */ -void wavefronts_penalties_set_indel( - wavefronts_penalties_t* const wavefronts_penalties); -void wavefronts_penalties_set_edit( - wavefronts_penalties_t* const wavefronts_penalties); -void wavefronts_penalties_set_linear( - wavefronts_penalties_t* const wavefronts_penalties, +void wavefront_penalties_set_indel( + wavefront_penalties_t* const wf_penalties); +void wavefront_penalties_set_edit( + wavefront_penalties_t* const wf_penalties); +void wavefront_penalties_set_linear( + wavefront_penalties_t* const wf_penalties, linear_penalties_t* const linear_penalties); -void wavefronts_penalties_set_affine( - wavefronts_penalties_t* const wavefronts_penalties, +void wavefront_penalties_set_affine( + wavefront_penalties_t* const wf_penalties, affine_penalties_t* const affine_penalties); -void wavefronts_penalties_set_affine2p( - wavefronts_penalties_t* const wavefronts_penalties, +void wavefront_penalties_set_affine2p( + wavefront_penalties_t* const wf_penalties, affine2p_penalties_t* const affine2p_penalties); -/* - * Score conversion - */ -int wavefronts_penalties_get_score_indel( - wavefronts_penalties_t* const wavefronts_penalties, - const int score); -int wavefronts_penalties_get_score_edit( - wavefronts_penalties_t* const wavefronts_penalties, - const int score); -int wavefronts_penalties_get_score_linear( - wavefronts_penalties_t* const wavefronts_penalties, - const int score); -int wavefronts_penalties_get_score_affine( - wavefronts_penalties_t* const wavefronts_penalties, - const int score); -int wavefronts_penalties_get_score_affine2p( - wavefronts_penalties_t* const wavefronts_penalties, - const int score); - /* * Display */ -void wavefronts_penalties_print( +void wavefront_penalties_print( FILE* const stream, - wavefronts_penalties_t* const wavefronts_penalties); + wavefront_penalties_t* const wf_penalties); #endif /* WAVEFRONT_WAVEFRONT_PENALTIES_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_plot.c b/pywfa/WFA2_lib/wavefront/wavefront_plot.c index 39905ee..9905cd2 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_plot.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_plot.c @@ -26,88 +26,158 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: WaveFront-Alignment module for plot + * DESCRIPTION: Wavefront alignment module for plot */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_plot.h" #include "wavefront_aligner.h" /* - * Setup + * Heatmaps */ -void wavefront_plot_allocate( +void wavefront_plot_heatmaps_allocate( wavefront_plot_t* const wf_plot, - const distance_metric_t distance_metric, const int pattern_length, - const int text_length, - wavefront_plot_params_t* const plot_params) { + const int text_length) { + wavefront_plot_attr_t* const attributes = &wf_plot->attributes; // Compute dimensions - const int min_v = (plot_params->min_v == -1) ? 0 : plot_params->min_v; - const int max_v = (plot_params->max_v == -1) ? pattern_length-1 : plot_params->max_v; - const int min_h = (plot_params->min_h == -1) ? 0 : plot_params->min_h; - const int max_h = (plot_params->max_h == -1) ? text_length-1 : plot_params->max_h; - // Wavefront Components - wf_plot->m_heatmap = heatmap_new(heatmap_min, - min_v,max_v,min_h,max_h,plot_params->resolution_points); - if (distance_metric == gap_affine) { - wf_plot->i1_heatmap = heatmap_new(heatmap_min, - min_v,max_v,min_h,max_h,plot_params->resolution_points); - wf_plot->d1_heatmap = heatmap_new(heatmap_min, - min_v,max_v,min_h,max_h,plot_params->resolution_points); - } else { - wf_plot->i1_heatmap = NULL; - wf_plot->d1_heatmap = NULL; - } - if (distance_metric == gap_affine_2p) { - wf_plot->i2_heatmap = heatmap_new(heatmap_min, - min_v,max_v,min_h,max_h,plot_params->resolution_points); - wf_plot->d2_heatmap = heatmap_new(heatmap_min, - min_v,max_v,min_h,max_h,plot_params->resolution_points); - } else { - wf_plot->i2_heatmap = NULL; - wf_plot->d2_heatmap = NULL; - } + const int resolution_points = attributes->resolution_points; + const int min_v = (wf_plot->min_v == -1) ? 0 : wf_plot->min_v; + const int max_v = (wf_plot->max_v == -1) ? pattern_length-1 : wf_plot->max_v; + const int min_h = (wf_plot->min_h == -1) ? 0 : wf_plot->min_h; + const int max_h = (wf_plot->max_h == -1) ? text_length-1 : wf_plot->max_h; // Behavior wf_plot->behavior_heatmap = heatmap_new(heatmap_value, - min_v,max_v,min_h,max_h,plot_params->resolution_points); + min_v,max_v,min_h,max_h,resolution_points); + // Wavefront Components + wf_plot->m_heatmap = heatmap_new(heatmap_min, + min_v,max_v,min_h,max_h,resolution_points); + wf_plot->i1_heatmap = NULL; + wf_plot->d1_heatmap = NULL; + wf_plot->i2_heatmap = NULL; + wf_plot->d2_heatmap = NULL; + if (wf_plot->distance_metric < gap_affine) return; + // Gap-affine + wf_plot->i1_heatmap = heatmap_new(heatmap_min, + min_v,max_v,min_h,max_h,resolution_points); + wf_plot->d1_heatmap = heatmap_new(heatmap_min, + min_v,max_v,min_h,max_h,resolution_points); + if (wf_plot->distance_metric == gap_affine) return; + // Gap-affine-2p + wf_plot->i2_heatmap = heatmap_new(heatmap_min, + min_v,max_v,min_h,max_h,resolution_points); + wf_plot->d2_heatmap = heatmap_new(heatmap_min, + min_v,max_v,min_h,max_h,resolution_points); } -void wavefront_plot_free( +void wavefront_plot_heatmaps_free( wavefront_plot_t* const wf_plot) { + heatmap_delete(wf_plot->behavior_heatmap); heatmap_delete(wf_plot->m_heatmap); if (wf_plot->i1_heatmap) heatmap_delete(wf_plot->i1_heatmap); if (wf_plot->d1_heatmap) heatmap_delete(wf_plot->d1_heatmap); if (wf_plot->i2_heatmap) heatmap_delete(wf_plot->i2_heatmap); if (wf_plot->d2_heatmap) heatmap_delete(wf_plot->d2_heatmap); - heatmap_delete(wf_plot->behavior_heatmap); +} +/* + * Setup + */ +wavefront_plot_t* wavefront_plot_new( + const distance_metric_t distance_metric, + const int pattern_length, + const int text_length, + wavefront_plot_attr_t* const attributes) { + // Handler + wavefront_plot_t* const wf_plot = (wavefront_plot_t*)malloc(sizeof(wavefront_plot_t)); + // Parameters + wf_plot->attributes = *attributes; + wf_plot->distance_metric = distance_metric; + wf_plot->min_v = -1; + wf_plot->max_v = -1; + wf_plot->min_h = -1; + wf_plot->max_h = -1; + // Allocate and configure + wavefront_plot_heatmaps_allocate(wf_plot,pattern_length,text_length); + // Return + return wf_plot; +} +void wavefront_plot_resize( + wavefront_plot_t* const wf_plot, + const int pattern_length, + const int text_length) { + // Free heatmaps + wavefront_plot_heatmaps_free(wf_plot); + // Allocate new heatmaps + wavefront_plot_heatmaps_allocate(wf_plot,pattern_length,text_length); +} +void wavefront_plot_delete( + wavefront_plot_t* const wf_plot) { + // Heatmaps + wavefront_plot_heatmaps_free(wf_plot); + // Handler + free(wf_plot); } /* * Accessors */ void wavefront_plot_component( + wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, const int score, heatmap_t* const wf_heatmap, - heatmap_t* const extend_heatmap) { - if (wavefront != NULL) { - int k; - for (k=wavefront->lo;k<=wavefront->hi;++k) { - const wf_offset_t offset = wavefront->offsets[k]; - if (offset >= 0) { - // Compute coordinates - int v = WAVEFRONT_V(k,offset); - int h = WAVEFRONT_H(k,offset); - if (v>=pattern_length || h>=text_length) continue; - heatmap_set(wf_heatmap,v,h,score); - // Simulate extension - if (extend_heatmap != NULL) { - while (vsequences; + const int pattern_begin = sequences->pattern_begin; + const int pattern_length = sequences->pattern_length; + const int text_begin = sequences->text_begin; + const int text_length = sequences->text_length; + const char* const pattern = sequences->pattern; + const char* const text = sequences->text; + const bool reverse = (wf_aligner->align_mode == wf_align_biwfa_breakpoint_reverse); + // Traverse all offsets + int k; + for (k=wavefront->lo;k<=wavefront->hi;++k) { + const wf_offset_t offset = wavefront->offsets[k]; + if (offset < 0) continue; + // Compute local coordinates + int v_local = WAVEFRONT_V(k,offset); + int h_local = WAVEFRONT_H(k,offset); + if (v_local < 0 || v_local >= pattern_length) continue; + if (h_local < 0 || h_local >= text_length) continue; + // Compute global coordinates + int v_global, h_global; + if (reverse) { + v_global = pattern_begin + (pattern_length - 1 - v_local); + h_global = text_begin + (text_length - 1 - h_local); + } else { + v_global = pattern_begin + v_local; + h_global = text_begin + h_local; + } + // Plot + if (reverse) { + if (h_local>0 && v_local>0) heatmap_set(wf_heatmap,v_global+1,h_global+1,score); + } else { + if (h_local>0 && v_local>0) heatmap_set(wf_heatmap,v_global-1,h_global-1,score); + } + // Simulate extension + if (extend) { + while (v_local < pattern_length && + h_local < text_length && + pattern[v_local] == text[h_local]) { + if (reverse) { + v_global--; h_global--; + } else { + v_global++; h_global++; + } + v_local++; h_local++; + if (reverse) { + heatmap_set(wf_heatmap,v_global+1,h_global+1,score); + } else { + heatmap_set(wf_heatmap,v_global-1,h_global-1,score); } } } @@ -115,63 +185,40 @@ void wavefront_plot_component( } void wavefront_plot( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const char* const text, - const int score) { + const int score, + const int align_level) { + // Check plotting enabled wrt align-level + if (wf_aligner->align_mode == wf_align_biwfa_breakpoint_forward || + wf_aligner->align_mode == wf_align_biwfa_breakpoint_reverse) { + if (align_level != wf_aligner->plot->attributes.align_level) return; + } + if (wf_aligner->align_mode == wf_align_biwfa_subsidiary && + wf_aligner->plot->attributes.align_level != -1) return; // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; wavefront_components_t* const wf_components = &wf_aligner->wf_components; - const int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; + const int score_mod = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; // Plot wavefront components - wavefront_plot_component( - wf_components->mwavefronts[s], - pattern,pattern_length,text,text_length, - score,wf_aligner->wf_plot.m_heatmap, - wf_aligner->wf_plot.behavior_heatmap); - if (distance_metric == gap_affine) { - wavefront_plot_component( - wf_components->i1wavefronts[s], - pattern,pattern_length,text,text_length, - score,wf_aligner->wf_plot.i1_heatmap,NULL); - wavefront_plot_component( - wf_components->d1wavefronts[s], - pattern,pattern_length,text,text_length, - score,wf_aligner->wf_plot.d1_heatmap,NULL); - } - if (distance_metric == gap_affine_2p) { - wavefront_plot_component( - wf_components->i2wavefronts[s], - pattern,pattern_length,text,text_length, - score,wf_aligner->wf_plot.i2_heatmap,NULL); - wavefront_plot_component( - wf_components->d2wavefronts[s], - pattern,pattern_length,text,text_length, - score,wf_aligner->wf_plot.d2_heatmap,NULL); - } + wavefront_plot_component(wf_aligner, + wf_components->mwavefronts[score_mod], + score,wf_aligner->plot->m_heatmap,true); + if (distance_metric < gap_affine) return; + // Gap-affine + wavefront_plot_component(wf_aligner, + wf_components->i1wavefronts[score_mod], + score,wf_aligner->plot->i1_heatmap,false); + wavefront_plot_component(wf_aligner, + wf_components->d1wavefronts[score_mod], + score,wf_aligner->plot->d1_heatmap,false); + if (distance_metric == gap_affine) return; + // Gap-affine-2p + wavefront_plot_component(wf_aligner, + wf_components->i2wavefronts[score_mod], + score,wf_aligner->plot->i2_heatmap,false); + wavefront_plot_component(wf_aligner, + wf_components->d2wavefronts[score_mod], + score,wf_aligner->plot->d2_heatmap,false); } -//void wavefront_plot_cutoff( -// wavefront_aligner_t* const wf_aligner, -// const int score, -// const int lo_base, -// const int lo_reduced, -// const int hi_base, -// const int hi_reduced) { -// wavefront_components_t* const wf_components = &wf_aligner->wf_components; -// const int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; -// wavefront_t* const wavefront = wf_components->mwavefronts[s]; -// heatmap_t* const heatmap = wf_aligner->wf_plot.behavior_heatmap; -// int k; -// for (k=lo_base;koffsets[k]; -// if (offset >= 0) heatmap_set(heatmap,WAVEFRONT_V(k,offset),WAVEFRONT_H(k,offset),20); -// } -// for (k=hi_reduced+1;k<=hi_base;++k) { -// const wf_offset_t offset = wavefront->offsets[k]; -// if (offset >= 0) heatmap_set(heatmap,WAVEFRONT_V(k,offset),WAVEFRONT_H(k,offset),20); -// } -//} /* * Display */ @@ -181,19 +228,19 @@ void wavefront_plot_print_cigar( const char target_operation) { int i, h=0, v=0, count=0; for (i=cigar->begin_offset;iend_offset;++i) { - // Print point - const char operation = cigar->operations[i]; - if (operation == target_operation) { - if (count++ > 0) fprintf(stream,";"); - fprintf(stream,"%d,%d",h,v); - } // Check operation + const char operation = cigar->operations[i]; switch (operation) { case 'M': case 'X': ++h; ++v; break; case 'I': ++h; break; case 'D': ++v; break; default: break; } + // Print point + if (operation == target_operation && h>0 && v>0) { + if (count++ > 0) fprintf(stream,";"); + fprintf(stream,"%d,%d",h-1,v-1); + } } } void wavefront_plot_print( @@ -201,23 +248,43 @@ void wavefront_plot_print( wavefront_aligner_t* const wf_aligner) { // Parameters const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - wavefront_plot_t* const wf_plot = &wf_aligner->wf_plot; + wavefront_plot_t* const wf_plot = wf_aligner->plot; + wavefront_sequences_t* sequences = NULL; + if (wf_aligner->bialigner == NULL) { + sequences = &wf_aligner->sequences; + } else { + sequences = &wf_aligner->bialigner->wf_forward->sequences; + wavefront_sequences_set_bounds(sequences, + 0,sequences->pattern_buffer_length, + 0,sequences->text_buffer_length); + } + const int pattern_length = sequences->pattern_buffer_length; + const int text_length = sequences->text_buffer_length; + // Check plot + if (wf_aligner->plot == NULL) { + fprintf(stream,"# WFA-plot not enabled\n"); + return; + } // Metadata - fprintf(stream,"# PatternLength %d\n",wf_aligner->pattern_length); - fprintf(stream,"# TextLength %d\n",wf_aligner->text_length); + if (sequences->mode == wf_sequences_lambda) { + fprintf(stream,"# PatternLength %d\n",pattern_length); + fprintf(stream,"# TextLength %d\n",text_length); + fprintf(stream,"# Pattern -\n"); + fprintf(stream,"# Text -\n"); + } else { + fprintf(stream,"# PatternLength %d\n",pattern_length); + fprintf(stream,"# Pattern %.*s\n",pattern_length,sequences->pattern); + fprintf(stream,"# TextLength %d\n",text_length); + fprintf(stream,"# Text %.*s\n",text_length,sequences->text); + } fprintf(stream,"# Penalties "); - wavefronts_penalties_print(stream,&wf_aligner->penalties); + wavefront_penalties_print(stream,&wf_aligner->penalties); fprintf(stream,"\n"); // Alignment mode - fprintf(stream,"# WFAMode ("); - fprintf(stream,"%s",(wf_aligner->alignment_scope==compute_score)?"S":"A"); - fprintf(stream,"%c",(wf_aligner->wf_components.bt_piggyback)?'L':'F'); - fprintf(stream,"%c",(wf_aligner->alignment_form.span==alignment_end2end)?'G':'S'); + fprintf(stream,"# WFAMode "); wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; - if (wf_heuristic->strategy != wf_heuristic_none) { - wavefront_heuristic_print(stream,wf_heuristic); - } - fprintf(stream,")\n"); + wavefront_heuristic_print(stream,wf_heuristic); + fprintf(stream,"\n"); // Wavefront components fprintf(stream,"# Heatmap M\n"); heatmap_print(stream,wf_plot->m_heatmap); if (distance_metric == gap_affine) { @@ -233,16 +300,16 @@ void wavefront_plot_print( // CIGAR if (wf_aligner->alignment_scope == compute_alignment) { fprintf(stream,"# List CIGAR-M "); - wavefront_plot_print_cigar(stream,&wf_aligner->cigar,'M'); + wavefront_plot_print_cigar(stream,wf_aligner->cigar,'M'); fprintf(stream,"\n"); fprintf(stream,"# List CIGAR-X "); - wavefront_plot_print_cigar(stream,&wf_aligner->cigar,'X'); + wavefront_plot_print_cigar(stream,wf_aligner->cigar,'X'); fprintf(stream,"\n"); fprintf(stream,"# List CIGAR-I "); - wavefront_plot_print_cigar(stream,&wf_aligner->cigar,'I'); + wavefront_plot_print_cigar(stream,wf_aligner->cigar,'I'); fprintf(stream,"\n"); fprintf(stream,"# List CIGAR-D "); - wavefront_plot_print_cigar(stream,&wf_aligner->cigar,'D'); + wavefront_plot_print_cigar(stream,wf_aligner->cigar,'D'); fprintf(stream,"\n"); } } diff --git a/pywfa/WFA2_lib/wavefront/wavefront_plot.h b/pywfa/WFA2_lib/wavefront/wavefront_plot.h index c824e08..79349c8 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_plot.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_plot.h @@ -26,14 +26,14 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: WaveFront-Alignment module for plot + * DESCRIPTION: Wavefront alignment module for plot */ #ifndef WAVEFRONT_PLOT_H_ #define WAVEFRONT_PLOT_H_ -#include "utils/commons.h" #include "utils/heatmap.h" +#include "alignment/score_matrix.h" #include "wavefront/wavefront_penalties.h" // Wavefront ahead definition @@ -43,49 +43,52 @@ typedef struct _wavefront_aligner_t wavefront_aligner_t; * Wavefront Display */ typedef struct { - // Display enabled - bool plot_enabled; - // Resolution and range - int resolution_points; + bool enabled; // Is plotting enabled + int resolution_points; // Total resolution points + int align_level; // Level of recursion to plot (-1 == final) +} wavefront_plot_attr_t; +typedef struct { + // Configuration + wavefront_plot_attr_t attributes; + distance_metric_t distance_metric; int min_v; int max_v; int min_h; int max_h; -} wavefront_plot_params_t; -typedef struct { - // Wavefront components + // Wavefront Heatmaps heatmap_t* m_heatmap; heatmap_t* i1_heatmap; heatmap_t* d1_heatmap; heatmap_t* i2_heatmap; heatmap_t* d2_heatmap; - // Alignment behavior heatmap_t* behavior_heatmap; } wavefront_plot_t; /* * Setup */ -void wavefront_plot_allocate( - wavefront_plot_t* const wf_plot, +wavefront_plot_t* wavefront_plot_new( const distance_metric_t distance_metric, const int pattern_length, const int text_length, - wavefront_plot_params_t* const plot_params); -void wavefront_plot_free( + wavefront_plot_attr_t* const attributes); +void wavefront_plot_resize( + wavefront_plot_t* const wf_plot, + const int pattern_length, + const int text_length); +void wavefront_plot_delete( wavefront_plot_t* const wf_plot); /* - * Accessors + * Plot record state */ void wavefront_plot( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const char* const text, - const int score); + const int score, + const int align_level); /* - * Display + * Display/Dump */ void wavefront_plot_print( FILE* const stream, diff --git a/pywfa/WFA2_lib/wavefront/wavefront_sequences.c b/pywfa/WFA2_lib/wavefront/wavefront_sequences.c new file mode 100644 index 0000000..e4963cd --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_sequences.c @@ -0,0 +1,310 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to encapsulate the input sequences + */ + +#include "wavefront_sequences.h" + +/* + * Configuration + */ +#define WF_SEQUENCES_PADDING 64 +#define WF_SEQUENCES_PATTERN_EOS '!' +#define WF_SEQUENCES_TEXT_EOS '?' + +/* + * Setup + */ +void wavefront_sequences_allocate( + wavefront_sequences_t* const wf_sequences) { + // Mode + wf_sequences->mode = wf_sequences_ascii; + wf_sequences->reverse = false; + // Source sequences + wf_sequences->seq_buffer = NULL; + wf_sequences->seq_buffer_allocated = 0; + // Current state + wf_sequences->pattern = NULL; + wf_sequences->text = NULL; +} +void wavefront_sequences_free( + wavefront_sequences_t* const wf_sequences) { + // Free internal buffers + if (wf_sequences->seq_buffer != NULL) free(wf_sequences->seq_buffer); +} +/* + * Init Sequences + */ +void wavefront_sequences_init_allocate( + wavefront_sequences_t* const wf_sequences, + const int pattern_length, + const int text_length) { + // Compute dimensions + const int buffer_size = pattern_length + text_length + 3*WF_SEQUENCES_PADDING; + // Check internal buffer allocated + if (wf_sequences->seq_buffer_allocated < buffer_size) { + // Free + if (wf_sequences->seq_buffer != NULL) free(wf_sequences->seq_buffer); + // Allocate + const int proposed_size = buffer_size + buffer_size/2; + wf_sequences->seq_buffer = calloc(proposed_size,1); + wf_sequences->seq_buffer_allocated = proposed_size; + } + // Assign memory + wf_sequences->pattern_buffer = wf_sequences->seq_buffer + WF_SEQUENCES_PADDING; + wf_sequences->text_buffer = wf_sequences->seq_buffer + WF_SEQUENCES_PADDING + pattern_length + WF_SEQUENCES_PADDING; +} +void wavefront_sequences_init_copy( + char* const buffer_dst, + const char* const sequence, + const int sequence_length, + const int padding_length, + const char padding_value, + const bool reverse) { + // Copy sequence + if (reverse) { + int i; + for (i=0;i>2 & 3)]; + const char letter2 = dna_packed2bits_decode[(word>>4 & 3)]; + const char letter3 = dna_packed2bits_decode[(word>>6 & 3)]; + if (reverse) { + buffer_dst[buffer_pos ] = letter0; + buffer_dst[buffer_pos-1] = letter1; + buffer_dst[buffer_pos-2] = letter2; + buffer_dst[buffer_pos-3] = letter3; + buffer_pos -= 4; + } else { + buffer_dst[buffer_pos ] = letter0; + buffer_dst[buffer_pos+1] = letter1; + buffer_dst[buffer_pos+2] = letter2; + buffer_dst[buffer_pos+3] = letter3; + buffer_pos += 4; + } + } + // Add end padding + buffer_dst[sequence_length] = padding_value; +} +void wavefront_sequences_init_ascii( + wavefront_sequences_t* const wf_sequences, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + const bool reverse) { + // Mode + wf_sequences->mode = wf_sequences_ascii; + wf_sequences->reverse = reverse; + // Allocate buffers + wavefront_sequences_init_allocate(wf_sequences,pattern_length,text_length); + // Copy internal sequences + wavefront_sequences_init_copy(wf_sequences->pattern_buffer, + pattern,pattern_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_PATTERN_EOS,reverse); + wf_sequences->pattern_buffer_length = pattern_length; + wavefront_sequences_init_copy(wf_sequences->text_buffer, + text,text_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_TEXT_EOS,reverse); + wf_sequences->text_buffer_length = text_length; + // Set pattern + wf_sequences->pattern = wf_sequences->pattern_buffer; + wf_sequences->pattern_begin = 0; + wf_sequences->pattern_length = pattern_length; + wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length]; + // Set text + wf_sequences->text = wf_sequences->text_buffer; + wf_sequences->text_begin = 0; + wf_sequences->text_length = text_length; + wf_sequences->text_eos = wf_sequences->text[text_length]; +} +void wavefront_sequences_init_lambda( + wavefront_sequences_t* const wf_sequences, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length, + const bool reverse) { + // Mode + wf_sequences->mode = wf_sequences_lambda; + wf_sequences->reverse = reverse; + // Set sequences' length + wf_sequences->pattern = NULL; + wf_sequences->text = NULL; + wf_sequences->pattern_begin = 0; + wf_sequences->pattern_length = pattern_length; + wf_sequences->text_begin = 0; + wf_sequences->text_length = text_length; + // Internals + wf_sequences->match_funct = match_funct; + wf_sequences->match_funct_arguments = match_funct_arguments; +} +void wavefront_sequences_init_packed2bits( + wavefront_sequences_t* const wf_sequences, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length, + const bool reverse) { + // Mode + wf_sequences->mode = wf_sequences_ascii; + wf_sequences->reverse = reverse; + // Allocate buffers + wavefront_sequences_init_allocate(wf_sequences,pattern_length,text_length); + // Copy internal sequences + wavefront_sequences_init_decode2bits(wf_sequences->pattern_buffer, + pattern,pattern_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_PATTERN_EOS,reverse); + wf_sequences->pattern_buffer_length = pattern_length; + wavefront_sequences_init_decode2bits(wf_sequences->text_buffer, + text,text_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_TEXT_EOS,reverse); + wf_sequences->text_buffer_length = text_length; + // Set pattern + wf_sequences->pattern = wf_sequences->pattern_buffer; + wf_sequences->pattern_begin = 0; + wf_sequences->pattern_length = pattern_length; + wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length]; + // Set text + wf_sequences->text = wf_sequences->text_buffer; + wf_sequences->text_begin = 0; + wf_sequences->text_length = text_length; + wf_sequences->text_eos = wf_sequences->text[text_length]; +} +/* + * Accessors + */ +bool wavefront_sequences_cmp( + wavefront_sequences_t* const wf_sequences, + const int pattern_pos, + const int text_pos) { + // Select mode + if (wf_sequences->mode == wf_sequences_lambda) { + // Custom function to compare sequences + alignment_match_funct_t match_funct = wf_sequences->match_funct; + void* match_funct_arguments = wf_sequences->match_funct_arguments; + // Check coordinates (EOS) + const int pattern_length = wf_sequences->pattern_length; + const int text_length = wf_sequences->text_length; + if (pattern_pos >= pattern_length || text_pos >= text_length) return false; + // Compare using lambda (given coordinates) + const int pattern_begin = wf_sequences->pattern_begin; + const int text_begin = wf_sequences->text_begin; + if (wf_sequences->reverse) { + const int pattern_end = pattern_begin + pattern_length - 1; + const int text_end = text_begin + text_length - 1; + return match_funct(pattern_end-pattern_pos,text_end-text_pos,match_funct_arguments); + } else { + return match_funct(pattern_begin+pattern_pos,text_begin+text_pos,match_funct_arguments); + } + } else { + // Compare regular strings + return wf_sequences->pattern[pattern_pos] == wf_sequences->text[text_pos]; + } +} +char wavefront_sequences_get_pattern( + wavefront_sequences_t* const wf_sequences, + const int position) { + if (wf_sequences->mode == wf_sequences_lambda) { + return '-'; + } else { + return wf_sequences->pattern[position]; + } +} +char wavefront_sequences_get_text( + wavefront_sequences_t* const wf_sequences, + const int position) { + if (wf_sequences->mode == wf_sequences_lambda) { + return '-'; + } else { + return wf_sequences->text[position]; + } +} +/* + * Resize/Update + */ +void wavefront_sequences_set_bounds( + wavefront_sequences_t* const wf_sequences, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end) { + // Select mode + if (wf_sequences->mode != wf_sequences_lambda) { + // Restore previous EOS char + const int pattern_length_old = wf_sequences->pattern_length; + const int text_length_old = wf_sequences->text_length; + wf_sequences->pattern[pattern_length_old] = wf_sequences->pattern_eos; + wf_sequences->text[text_length_old] = wf_sequences->text_eos; + // Focus on the new section of the sequences + if (wf_sequences->reverse) { + // Compare given coordinates + wf_sequences->pattern = wf_sequences->pattern_buffer + (wf_sequences->pattern_buffer_length - pattern_end); + wf_sequences->text = wf_sequences->text_buffer + (wf_sequences->text_buffer_length - text_end); + } else { + wf_sequences->pattern = wf_sequences->pattern_buffer + pattern_begin; + wf_sequences->text = wf_sequences->text_buffer + text_begin; + } + // Save EOS char and truncate sequence + const int pattern_length_new = pattern_end - pattern_begin; + const int text_length_new = text_end - text_begin; + wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length_new]; + wf_sequences->text_eos = wf_sequences->text[text_length_new]; + wf_sequences->pattern[pattern_length_new] = WF_SEQUENCES_PATTERN_EOS; + wf_sequences->text[text_length_new] = WF_SEQUENCES_TEXT_EOS; + } + // Set bounds + wf_sequences->pattern_begin = pattern_begin; + wf_sequences->pattern_length = pattern_end - pattern_begin; + wf_sequences->text_begin = text_begin; + wf_sequences->text_length = text_end - text_begin; +} + diff --git a/pywfa/WFA2_lib/wavefront/wavefront_sequences.h b/pywfa/WFA2_lib/wavefront/wavefront_sequences.h new file mode 100644 index 0000000..7e047c3 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_sequences.h @@ -0,0 +1,148 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to encapsulate the input sequences + */ + +#ifndef WAVEFRONT_SEQUENCES_H_ +#define WAVEFRONT_SEQUENCES_H_ + +#include "utils/commons.h" + +/* + * Custom extend-match function, e.g.: + * + * typedef struct { + * char* pattern; + * int pattern_length; + * char* text; + * int text_length; + * } match_function_params_t; + * + * int match_function(int v,int h,void* arguments) { + * // Extract parameters + * match_function_params_t* match_arguments = (match_function_params_t*)arguments; + * // Check match + * if (v > match_arguments->pattern_length || h > match_arguments->text_length) return 0; + * return (match_arguments->pattern[v] == match_arguments->text[h]); + * } + */ +typedef int (*alignment_match_funct_t)(int,int,void*); + +/* + * Wavefront Sequences + */ +typedef enum { + wf_sequences_ascii = 0, + wf_sequences_lambda = 1, + wf_sequences_packed2bits = 2, +} wf_sequences_mode_t; +typedef struct { + // Mode + wf_sequences_mode_t mode; // Sequences mode + bool reverse; // Reverse sequences + // Current sequences & bounds + char* pattern; // Pointer to current pattern sequence (padded) + char* text; // Pointer to current text sequence (padded) + int pattern_begin; // Pattern begin offset + int pattern_length; // Pattern length + int text_begin; // Text begin offset + int text_length; // Text length + // Lambda Sequence + alignment_match_funct_t match_funct; // Custom matching function (match(v,h,args)) + void* match_funct_arguments; // Generic arguments passed to matching function (args) + // Internal buffers (ASCII encoded) + char* seq_buffer; // Internal buffer + int seq_buffer_allocated; // Internal buffer allocated + char* pattern_buffer; // Source pattern sequence + char* text_buffer; // Source text sequence + int pattern_buffer_length; // Source pattern length + int text_buffer_length; // Source text length + char pattern_eos; // Source pattern char at EOS + char text_eos; // Source pattern char at EOS +} wavefront_sequences_t; + +/* + * Setup + */ +void wavefront_sequences_allocate( + wavefront_sequences_t* const wf_sequences); +void wavefront_sequences_free( + wavefront_sequences_t* const wf_sequences); + +/* + * Init Sequences + */ +void wavefront_sequences_init_ascii( + wavefront_sequences_t* const wf_sequences, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + const bool reverse); +void wavefront_sequences_init_lambda( + wavefront_sequences_t* const wf_sequences, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length, + const bool reverse); +void wavefront_sequences_init_packed2bits( + wavefront_sequences_t* const wf_sequences, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length, + const bool reverse); + + +/* + * Accessors + */ +bool wavefront_sequences_cmp( + wavefront_sequences_t* const wf_sequences, + const int pattern_pos, + const int text_pos); +char wavefront_sequences_get_pattern( + wavefront_sequences_t* const wf_sequences, + const int position); +char wavefront_sequences_get_text( + wavefront_sequences_t* const wf_sequences, + const int position); + +/* + * Resize/Update + */ +void wavefront_sequences_set_bounds( + wavefront_sequences_t* const wf_sequences, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end); + +#endif /* WAVEFRONT_SEQUENCES_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_slab.c b/pywfa/WFA2_lib/wavefront/wavefront_slab.c index 89a897b..98a4f3f 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_slab.c +++ b/pywfa/WFA2_lib/wavefront/wavefront_slab.c @@ -29,6 +29,8 @@ * DESCRIPTION: WaveFront Slab for fast pre-allocated wavefronts' memory handling */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_slab.h" /* @@ -286,6 +288,3 @@ uint64_t wavefront_slab_get_size( wavefront_slab_t* const wavefront_slab) { return wavefront_slab->memory_used; } - - - diff --git a/pywfa/WFA2_lib/wavefront/wavefront_slab.h b/pywfa/WFA2_lib/wavefront/wavefront_slab.h index 90f6bb6..9e1d245 100644 --- a/pywfa/WFA2_lib/wavefront/wavefront_slab.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_slab.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_SLAB_H_ #define WAVEFRONT_SLAB_H_ -#include "utils/commons.h" #include "utils/vector.h" #include "system/mm_allocator.h" #include "wavefront.h" diff --git a/pywfa/WFA2_lib/wavefront/wavefront_termination.c b/pywfa/WFA2_lib/wavefront/wavefront_termination.c new file mode 100644 index 0000000..e9e91bb --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_termination.c @@ -0,0 +1,162 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to check for the termination of an alignment + */ + +#include "wavefront_termination.h" + +/* + * Detect alignment termination (end of alignment) + */ +FORCE_NO_INLINE bool wavefront_termination_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int score_mod) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + const affine2p_matrix_type component_end = wf_aligner->component_end; + const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); + const wf_offset_t alignment_offset = DPMATRIX_OFFSET(text_length,pattern_length); + // Select end component + switch (component_end) { + case affine2p_matrix_M: { + // Check diagonal/offset + if (mwavefront->lo > alignment_k || alignment_k > mwavefront->hi) return false; // Not done + const wf_offset_t moffset = mwavefront->offsets[alignment_k]; + if (moffset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_I1: { + // Fetch I1-wavefront & check diagonal/offset + wavefront_t* const i1wavefront = wf_aligner->wf_components.i1wavefronts[score_mod]; + if (i1wavefront == NULL || i1wavefront->lo > alignment_k || alignment_k > i1wavefront->hi) return false; // Not done + const wf_offset_t i1offset = i1wavefront->offsets[alignment_k]; + if (i1offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_I2: { + // Fetch I2-wavefront & check diagonal/offset + wavefront_t* const i2wavefront = wf_aligner->wf_components.i2wavefronts[score_mod]; + if (i2wavefront == NULL || i2wavefront->lo > alignment_k || alignment_k > i2wavefront->hi) return false; // Not done + const wf_offset_t i2offset = i2wavefront->offsets[alignment_k]; + if (i2offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_D1: { + // Fetch D1-wavefront & check diagonal/offset + wavefront_t* const d1wavefront = wf_aligner->wf_components.d1wavefronts[score_mod]; + if (d1wavefront == NULL || d1wavefront->lo > alignment_k || alignment_k > d1wavefront->hi) return false; // Not done + const wf_offset_t d1offset = d1wavefront->offsets[alignment_k]; + if (d1offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_D2: { + // Fetch D2-wavefront & check diagonal/offset + wavefront_t* const d2wavefront = wf_aligner->wf_components.d2wavefronts[score_mod]; + if (d2wavefront == NULL || d2wavefront->lo > alignment_k || alignment_k > d2wavefront->hi) return false; // Not done + const wf_offset_t d2offset = d2wavefront->offsets[alignment_k]; + if (d2offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + default: + break; + } + return false; +} +FORCE_NO_INLINE bool wavefront_termination_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int k, + const wf_offset_t offset) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + // Check ends-free reaching boundaries + const int h_pos = WAVEFRONT_H(k,offset); + const int v_pos = WAVEFRONT_V(k,offset); + if (h_pos >= text_length) { // Text is aligned + // Is Pattern end-free? + const int pattern_left = pattern_length - v_pos; + const int pattern_end_free = wf_aligner->alignment_form.pattern_end_free; + if (pattern_left <= pattern_end_free) { + #ifdef WFA_PARALLEL + #pragma omp critical + #endif + { + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = k; + wf_aligner->alignment_end_pos.offset = offset; + } + return true; // Quit (we are done) + } + } + if (v_pos >= pattern_length) { // Pattern is aligned + // Is text end-free? + const int text_left = text_length - h_pos; + const int text_end_free = wf_aligner->alignment_form.text_end_free; + if (text_left <= text_end_free) { + #ifdef WFA_PARALLEL + #pragma omp critical + #endif + { + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = k; + wf_aligner->alignment_end_pos.offset = offset; + } + return true; // Quit (we are done) + } + } + // Not done + return false; +} diff --git a/pywfa/WFA2_lib/wavefront/wavefront_termination.h b/pywfa/WFA2_lib/wavefront/wavefront_termination.h new file mode 100644 index 0000000..05085ba --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_termination.h @@ -0,0 +1,57 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to check for the termination of an alignment + */ + +#ifndef WAVEFRONT_TERMINATION_H_ +#define WAVEFRONT_TERMINATION_H_ + +#include "wavefront_aligner.h" + +/* + * Necessary condition for ends-free termination + */ +#define WF_TERMINATION_ENDSFREE(h,v) ((h >= text_length) || (v >= pattern_length)) + +/* + * Detect alignment termination (end of alignment) + */ +bool wavefront_termination_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int score_mod); +bool wavefront_termination_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int k, + const wf_offset_t offset); + +#endif /* WAVEFRONT_TERMINATION_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wavefront_unialign.c b/pywfa/WFA2_lib/wavefront/wavefront_unialign.c new file mode 100644 index 0000000..a696e46 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wavefront_unialign.c @@ -0,0 +1,324 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + */ + +#include "utils/commons.h" +#include "system/mm_allocator.h" +#include "wavefront_unialign.h" +#include "wavefront.h" +#include "wavefront_attributes.h" +#include "wavefront_offset.h" +#include "wavefront_penalties.h" +#include "wavefront_plot.h" +#include "wavefront_slab.h" + +#include "wavefront_components.h" +#include "wavefront_compute.h" +#include "wavefront_compute_affine.h" +#include "wavefront_compute_affine2p.h" +#include "wavefront_compute_edit.h" +#include "wavefront_compute_linear.h" +#include "wavefront_extend.h" +#include "wavefront_backtrace.h" +#include "wavefront_backtrace_buffer.h" + +/* + * Initialize alignment + */ +void wavefront_unialign_init( + wavefront_aligner_t* const wf_aligner, + const affine2p_matrix_type component_begin, + const affine2p_matrix_type component_end) { + // Parameters + wavefront_align_status_t* const align_status = &wf_aligner->align_status; + alignment_form_t* const alignment_form = &wf_aligner->alignment_form; + const bool end2end = (alignment_form->span == alignment_end2end); + // Configure WF-compute function + switch (wf_aligner->penalties.distance_metric) { + case indel: + case edit: + align_status->wf_align_compute = &wavefront_compute_edit; + break; + case gap_linear: + align_status->wf_align_compute = &wavefront_compute_linear; + break; + case gap_affine: + align_status->wf_align_compute = &wavefront_compute_affine; + break; + case gap_affine_2p: + align_status->wf_align_compute = &wavefront_compute_affine2p; + break; + default: + fprintf(stderr,"[WFA] Distance function not implemented\n"); + exit(1); + break; + } + // Configure WF-extend function + if (end2end) { + align_status->wf_align_extend = &wavefront_extend_end2end; + } else { + align_status->wf_align_extend = &wavefront_extend_endsfree; + } + // Initialize wavefront-aligner (to perform a new alignment) + wf_aligner->component_begin = component_begin; + wf_aligner->component_end = component_end; + wavefront_aligner_init(wf_aligner,0); + // Clear cigar + cigar_clear(wf_aligner->cigar); +} +/* + * Limits + */ +bool wavefront_unialign_reached_limits( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Check alignment-score limit + if (score >= wf_aligner->system.max_alignment_steps) { + wf_aligner->cigar->score = -wf_aligner->system.max_alignment_steps; + wf_aligner->align_status.status = WF_STATUS_MAX_STEPS_REACHED; + wf_aligner->align_status.score = score; + return true; // Stop + } + // Global probing interval + alignment_system_t* const system = &wf_aligner->system; + if (score % system->probe_interval_global != 0) return false; // Continue + if (system->verbose >= 3) { + wavefront_unialign_print_status(stderr,wf_aligner,score); // DEBUG + } + // BT-Buffer + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + if (wf_components->bt_buffer!=NULL && (score%system->probe_interval_compact)==0) { + uint64_t bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer); + // Check BT-buffer memory + if (bt_memory > system->max_memory_compact) { + // Compact BT-buffer + wavefront_components_compact_bt_buffer(wf_components,score,wf_aligner->system.verbose); + // Set new buffer limit + bt_memory = wf_backtrace_buffer_get_size_used(wf_components->bt_buffer); + uint64_t proposed_mem = (double)bt_memory * TELESCOPIC_FACTOR; + if (system->max_memory_compact < proposed_mem && proposed_mem < system->max_memory_abort) { + proposed_mem = system->max_memory_compact; + } + // Reset (if maximum compacts has been performed) + if (wf_components->bt_buffer->num_compactions >= system->max_partial_compacts) { + wf_backtrace_buffer_reset_compaction(wf_components->bt_buffer); + } + } + } + // Check overall memory used + const uint64_t wf_memory_used = wavefront_aligner_get_size(wf_aligner); + if (wf_memory_used > system->max_memory_abort) { + wf_aligner->align_status.status = WF_STATUS_OOM; + wf_aligner->align_status.score = score; + return true; // Stop + } + // Otherwise continue + return false; +} +/* + * Terminate alignment (backtrace) + */ +void wavefront_unialign_terminate( + wavefront_aligner_t* const wf_aligner, + const int score) { + // Parameters + wavefront_align_status_t* const align_status = &wf_aligner->align_status; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + cigar_t* const cigar = wf_aligner->cigar; + // Select alignment scope + align_status->score = score; + if (wf_aligner->alignment_scope == compute_score) { + // Set end-alignment position & score + if (align_status->status == WF_STATUS_END_REACHED) { + cigar->end_v = pattern_length; + cigar->end_h = text_length; + cigar->score = wavefront_compute_classic_score(wf_aligner,pattern_length,text_length,score); + align_status->status = WF_STATUS_ALG_COMPLETED; + } else { + const int k = wf_aligner->alignment_end_pos.k; + const int offset = wf_aligner->alignment_end_pos.offset; + cigar->end_v = WAVEFRONT_V(k,offset); + cigar->end_h = WAVEFRONT_H(k,offset); + cigar->score = wavefront_compute_classic_score(wf_aligner,cigar->end_v,cigar->end_h,score); + align_status->dropped = true; + align_status->status = WF_STATUS_ALG_PARTIAL; + } + } else { + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const int alignment_end_k = wf_aligner->alignment_end_pos.k; + const wf_offset_t alignment_end_offset = wf_aligner->alignment_end_pos.offset; + if (alignment_end_offset != WAVEFRONT_OFFSET_NULL) { + if (wf_components->bt_piggyback) { + // Fetch wavefront + const bool memory_modular = wf_aligner->wf_components.memory_modular; + const int max_score_scope = wf_aligner->wf_components.max_score_scope; + const int score_mod = (memory_modular) ? score % max_score_scope : score; + wavefront_t* const mwavefront = wf_components->mwavefronts[score_mod]; + // Backtrace alignment from buffer (unpacking pcigar) + wavefront_backtrace_pcigar( + wf_aligner,alignment_end_k,alignment_end_offset, + mwavefront->bt_pcigar[alignment_end_k], + mwavefront->bt_prev[alignment_end_k]); + } else { + // Backtrace alignment + if (wf_aligner->penalties.distance_metric <= gap_linear) { + wavefront_backtrace_linear(wf_aligner, + score,alignment_end_k,alignment_end_offset); + } else { + wavefront_backtrace_affine(wf_aligner, + wf_aligner->component_begin,wf_aligner->component_end, + score,alignment_end_k,alignment_end_offset); + } + } + } + /* + * Post-processing (Extension-Trim, Score, and Ends) + * + * | Alignment-Regular | Alignment-Extension | + * |------------------------------------------------------------------------------| + * | END_REACHABLE | NoTrim + ALG_COMPLETED | Trim + ALG_PARTIAL|ALG_COMPLETED | + * |END_UNREACHABLE | Trim + ALG_PARTIAL | Trim + ALG_PARTIAL | + */ + const bool do_extension = wf_aligner->alignment_form.extension; + const bool unreachable = (align_status->status == WF_STATUS_END_UNREACHABLE); + align_status->dropped = unreachable; + if (do_extension || unreachable) { + // Alignment extension (maximal score) + const bool cigar_trimmed = wavefront_aligner_maxtrim_cigar(wf_aligner); + if (cigar_trimmed) { + align_status->status = WF_STATUS_ALG_PARTIAL; + } else { + align_status->status = (align_status->status == WF_STATUS_END_UNREACHABLE) ? + WF_STATUS_ALG_PARTIAL : WF_STATUS_ALG_COMPLETED; + } + } else { + const int k = wf_aligner->alignment_end_pos.k; + const int offset = wf_aligner->alignment_end_pos.offset; + cigar->end_v = WAVEFRONT_V(k,offset); + cigar->end_h = WAVEFRONT_H(k,offset); + cigar->score = wavefront_compute_classic_score(wf_aligner,cigar->end_v,cigar->end_h,score); + // Set status + if (unreachable) { + align_status->status = WF_STATUS_ALG_PARTIAL; + } else { + align_status->status = WF_STATUS_ALG_COMPLETED; + } + } + } +} +/* + * Classic WF-Alignment (Unidirectional) + */ +int wavefront_unialign( + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_align_status_t* const align_status = &wf_aligner->align_status; + void (*wf_align_compute)(wavefront_aligner_t* const,const int) = align_status->wf_align_compute; + int (*wf_align_extend)(wavefront_aligner_t* const,const int) = align_status->wf_align_extend; + // Compute wavefronts of increasing score + int score = align_status->score; + while (true) { + // Exact extend s-wavefront + const int finished = (*wf_align_extend)(wf_aligner,score); + if (finished) { + // DEBUG + // wavefront_aligner_print(stderr,wf_aligner,0,score,7,0); + if (align_status->status == WF_STATUS_END_REACHED || + align_status->status == WF_STATUS_END_UNREACHABLE) { + wavefront_unialign_terminate(wf_aligner,score); + } + return align_status->status; + } + // Compute (s+1)-wavefront + ++score; + (*wf_align_compute)(wf_aligner,score); + // Probe limits + if (wavefront_unialign_reached_limits(wf_aligner,score)) return align_status->status; + // Plot + if (wf_aligner->plot != NULL) wavefront_plot(wf_aligner,score,0); + // DEBUG + //wavefront_aligner_print(stderr,wf_aligner,score,score,7,0); + } + // Unreachable code + return WF_STATUS_OK; +} +/* + * Display + */ +void wavefront_unialign_print_status( + FILE* const stream, + wavefront_aligner_t* const wf_aligner, + const int score) { + // Parameters + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + // Approximate progress + const int dist_total = MAX(text_length,pattern_length); + int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; + wavefront_t* wavefront = wf_components->mwavefronts[s]; + if (wavefront==NULL && s>0) { + s = (wf_components->memory_modular) ? (score-1)%wf_components->max_score_scope : (score-1); + wavefront = wf_components->mwavefronts[s]; + } + int dist_max = -1, wf_len = -1, k; + if (wavefront!=NULL) { + wf_offset_t* const offsets = wavefront->offsets; + for (k=wavefront->lo;k<=wavefront->hi;++k) { + const int dist = MAX(WAVEFRONT_V(k,offsets[k]),WAVEFRONT_H(k,offsets[k])); + dist_max = MAX(dist_max,dist); + } + wf_len = wavefront->hi-wavefront->lo+1; + } + // Memory used + const uint64_t slab_size = wavefront_slab_get_size(wf_aligner->wavefront_slab); + const uint64_t bt_buffer_used = (wf_components->bt_buffer) ? + wf_backtrace_buffer_get_size_used(wf_components->bt_buffer) : 0; + // Progress + const float aligned_progress = (dist_max>=0) ? (100.0f*(float)dist_max/(float)dist_total) : -1.0f; + const float million_offsets = (wf_len>=0) ? (float)wf_len/1000000.0f : -1.0f; + // Print one-line status + fprintf(stream,"["); + wavefront_aligner_print_mode(stream,wf_aligner); + fprintf(stream, + "] SequenceLength=(%d,%d) Score %d (~ %2.3f%% aligned). " + "MemoryUsed(WF-Slab,BT-buffer)=(%lu MB,%lu MB). " + "Wavefronts ~ %2.3f Moffsets\n", + pattern_length, + text_length, + score, + aligned_progress, + CONVERT_B_TO_MB(slab_size), + CONVERT_B_TO_MB(bt_buffer_used), + million_offsets); +} diff --git a/pywfa/WFA2_lib/utils/string_padded.h b/pywfa/WFA2_lib/wavefront/wavefront_unialign.h similarity index 55% rename from pywfa/WFA2_lib/utils/string_padded.h rename to pywfa/WFA2_lib/wavefront/wavefront_unialign.h index ee7a250..1f92953 100644 --- a/pywfa/WFA2_lib/utils/string_padded.h +++ b/pywfa/WFA2_lib/wavefront/wavefront_unialign.h @@ -26,54 +26,34 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: Padded string module to avoid handling corner conditions */ -#ifndef STRING_PADDED_H_ -#define STRING_PADDED_H_ +#ifndef WAVEFRONT_UNIALIGN_H_ +#define WAVEFRONT_UNIALIGN_H_ + +#include "wavefront_aligner.h" /* - * Includes + * Initialize alignment */ -#include "utils/commons.h" -#include "system/mm_allocator.h" +void wavefront_unialign_init( + wavefront_aligner_t* const wf_aligner, + const affine2p_matrix_type component_begin, + const affine2p_matrix_type component_end); /* - * Strings Padded + * Classic WF-Alignment (Unidirectional) */ -typedef struct { - // Dimensions - int pattern_length; - int text_length; - // Padded strings - char* pattern_padded; - char* text_padded; - // MM - char* pattern_padded_buffer; - char* text_padded_buffer; - mm_allocator_t* mm_allocator; -} strings_padded_t; +int wavefront_unialign( + wavefront_aligner_t* const wf_aligner); /* - * Strings (text/pattern) padded + * Display */ -strings_padded_t* strings_padded_new( - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const int padding_length, - const bool reverse_sequences, - mm_allocator_t* const mm_allocator); -strings_padded_t* strings_padded_new_rhomb( - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const int padding_length, - const bool reverse_sequences, - mm_allocator_t* const mm_allocator); -void strings_padded_delete( - strings_padded_t* const strings_padded); +void wavefront_unialign_print_status( + FILE* const stream, + wavefront_aligner_t* const wf_aligner, + const int current_score); + +#endif /* WAVEFRONT_UNIALIGN_H_ */ -#endif /* STRING_PADDED_H_ */ diff --git a/pywfa/WFA2_lib/wavefront/wfa.h b/pywfa/WFA2_lib/wavefront/wfa.h new file mode 100644 index 0000000..45379a9 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wfa.h @@ -0,0 +1,216 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront aligner external C-ABI functions with accompanying data structures + */ + +#pragma once + +#include "system/mm_allocator.h" +#include "wavefront_slab.h" +#include "wavefront_penalties.h" +#include "wavefront_attributes.h" +#include "wavefront_components.h" +#include "wavefront_sequences.h" +#include "wavefront_bialigner.h" + +/* + * Error codes & messages + */ +// [OK] +#define WF_STATUS_ALG_COMPLETED 0 // Success (Complete alignment found) +#define WF_STATUS_ALG_PARTIAL 1 // Success (Partial alignment found) +// [FAIL] +#define WF_STATUS_MAX_STEPS_REACHED -100 // Maximum number of WFA-steps reached +#define WF_STATUS_OOM -200 // Maximum memory limit reached +#define WF_STATUS_UNATTAINABLE -300 // Alignment unattainable under configured heuristics +// [INTERNAL] +#define WF_STATUS_OK -1 // Computing alignment (in progress) +#define WF_STATUS_END_REACHED -2 // Alignment end reached +#define WF_STATUS_END_UNREACHABLE -3 // Alignment end unreachable under current configuration (eg Z-drop) +// Error messages +char* wavefront_align_strerror(const int error_code); +char* wavefront_align_strerror_short(const int error_code); + +/* + * Alignment status + */ +typedef struct _wavefront_aligner_t wavefront_aligner_t; +typedef struct { + // Status + int status; // Status code + int score; // Current WF-alignment score + bool dropped; // Heuristically dropped + int num_null_steps; // Total contiguous null-steps performed + uint64_t memory_used; // Total memory used + // Wavefront alignment functions + void (*wf_align_compute)(wavefront_aligner_t* const,const int); // WF Compute function + int (*wf_align_extend)(wavefront_aligner_t* const,const int); // WF Extend function +} wavefront_align_status_t; + +/* + * Alignment type + */ +typedef enum { + wf_align_regular = 0, + wf_align_biwfa = 1, + wf_align_biwfa_breakpoint_forward = 2, + wf_align_biwfa_breakpoint_reverse = 3, + wf_align_biwfa_subsidiary = 4 +} wavefront_align_mode_t; + +/* + * Wavefront Aligner + */ +typedef struct _wavefront_aligner_t { + // Mode and status + wavefront_align_mode_t align_mode; // WFA alignment mode + char* align_mode_tag; // WFA mode tag + wavefront_align_status_t align_status; // Current alignment status + // Sequences + wavefront_sequences_t sequences; // Input sequences + // Alignment Attributes + alignment_scope_t alignment_scope; // Alignment scope (score only or full-CIGAR) + alignment_form_t alignment_form; // Alignment form (end-to-end/ends-free) + wavefront_penalties_t penalties; // Alignment penalties + wavefront_heuristic_t heuristic; // Heuristic's parameters + wavefront_memory_t memory_mode; // Wavefront memory strategy (modular wavefronts and piggyback) + // Wavefront components + wavefront_components_t wf_components; // Wavefront components + affine2p_matrix_type component_begin; // Alignment begin component + affine2p_matrix_type component_end; // Alignment end component + wavefront_pos_t alignment_end_pos; // Alignment end position + // Bidirectional Alignment + wavefront_bialigner_t* bialigner; // BiWFA aligner + // CIGAR + cigar_t* cigar; // Alignment CIGAR + // MM + bool mm_allocator_own; // Ownership of MM-Allocator + mm_allocator_t* mm_allocator; // MM-Allocator + wavefront_slab_t* wavefront_slab; // MM-Wavefront-Slab (Allocates/Reuses the individual wavefronts) + // Display + wavefront_plot_t* plot; // Wavefront plot + // System + alignment_system_t system; // System related parameters +} wavefront_aligner_t; + +/* + * Setup + */ +wavefront_aligner_t* wavefront_aligner_new( + wavefront_aligner_attr_t* attributes); +void wavefront_aligner_reap( + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_delete( + wavefront_aligner_t* const wf_aligner); + +/* + * Span configuration + */ +void wavefront_aligner_set_alignment_end_to_end( + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_set_alignment_free_ends( + wavefront_aligner_t* const wf_aligner, + const int pattern_begin_free, + const int pattern_end_free, + const int text_begin_free, + const int text_end_free); +void wavefront_aligner_set_alignment_extension( + wavefront_aligner_t* const wf_aligner); + +/* + * Heuristic configuration + */ +void wavefront_aligner_set_heuristic_none( + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_set_heuristic_wfadaptive( + wavefront_aligner_t* const wf_aligner, + const int min_wavefront_length, + const int max_distance_threshold, + const int score_steps); +void wavefront_aligner_set_heuristic_wfmash( + wavefront_aligner_t* const wf_aligner, + const int min_wavefront_length, + const int max_distance_threshold, + const int score_steps); +void wavefront_aligner_set_heuristic_xdrop( + wavefront_aligner_t* const wf_aligner, + const int xdrop, + const int score_steps); +void wavefront_aligner_set_heuristic_zdrop( + wavefront_aligner_t* const wf_aligner, + const int ydrop, + const int score_steps); +void wavefront_aligner_set_heuristic_banded_static( + wavefront_aligner_t* const wf_aligner, + const int band_min_k, + const int band_max_k); +void wavefront_aligner_set_heuristic_banded_adaptive( + wavefront_aligner_t* const wf_aligner, + const int band_min_k, + const int band_max_k, + const int score_steps); + +/* + * System configuration + */ +void wavefront_aligner_set_max_alignment_steps( + wavefront_aligner_t* const wf_aligner, + const int max_alignment_steps); +void wavefront_aligner_set_max_memory( + wavefront_aligner_t* const wf_aligner, + const uint64_t max_memory_resident, + const uint64_t max_memory_abort); +void wavefront_aligner_set_max_num_threads( + wavefront_aligner_t* const wf_aligner, + const int max_num_threads); +void wavefront_aligner_set_min_offsets_per_thread( + wavefront_aligner_t* const wf_aligner, + const int min_offsets_per_thread); + +/* + * Wavefront Align + */ +int wavefront_align( + wavefront_aligner_t* const wf_aligner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length); +int wavefront_align_lambda( + wavefront_aligner_t* const wf_aligner, + alignment_match_funct_t const match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length); +int wavefront_align_packed2bits( + wavefront_aligner_t* const wf_aligner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length); diff --git a/pywfa/WFA2_lib/wavefront/wfa.hpp b/pywfa/WFA2_lib/wavefront/wfa.hpp new file mode 100644 index 0000000..7908b97 --- /dev/null +++ b/pywfa/WFA2_lib/wavefront/wfa.hpp @@ -0,0 +1,36 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront aligner external C-ABI functions with accompanying data structures + */ + +#pragma once + +extern "C" { + #include "wfa.h" +} diff --git a/pywfa/WFA_wrap.pxd b/pywfa/WFA_wrap.pxd index 05e84e2..ac91b17 100644 --- a/pywfa/WFA_wrap.pxd +++ b/pywfa/WFA_wrap.pxd @@ -1,10 +1,20 @@ #cython: language_level=3, boundscheck=False, wraparound=False, nonecheck=False -from libc.stdint cimport uint64_t +from libc.stdint cimport uint8_t, int32_t, uint32_t, uint64_t from libc.stdio cimport FILE +from posix.time cimport timespec -cdef extern from "WFA2_lib/wavefront/wavefront_penalties.h" nogil: +cdef extern from "WFA2_lib/utils/vector.h" nogil: + # Data Structures + ctypedef struct vector_t: + void* memory + uint64_t used + uint64_t element_size + uint64_t elements_allocated + + +cdef extern from "WFA2_lib/alignment/linear_penalties.h" nogil: ctypedef struct linear_penalties_t: int match # (Penalty representation usually M <= 0) int mismatch # (Penalty representation usually X > 0) @@ -44,10 +54,9 @@ cdef extern from "WFA2_lib/alignment/affine2p_penalties.h" nogil: affine2p_matrix_D1 affine2p_matrix_D2 - cdef extern from "WFA2_lib/wavefront/wavefront_heuristic.h" nogil: # Wavefront ahead definition - # ctypedef struct _wavefront_aligner_t wavefront_aligner_t + ctypedef _wavefront_aligner_t wavefront_aligner_t # Wavefront Heuristics ctypedef enum wf_heuristic_strategy: @@ -57,15 +66,16 @@ cdef extern from "WFA2_lib/wavefront/wavefront_heuristic.h" nogil: wf_heuristic_wfadaptive = 0x0000000000000004ul wf_heuristic_xdrop = 0x0000000000000010ul wf_heuristic_zdrop = 0x0000000000000020ul + wf_heuristic_wfmash = 0x0000000000000040ul ctypedef struct wavefront_heuristic_t: # Heuristic wf_heuristic_strategy strategy # Heuristic strategy int steps_between_cutoffs # Score-steps between heuristic cut-offs - # Banded + # Static/Adaptive Banded int min_k # Banded: Minimum k to consider in band int max_k # Banded: Maximum k to consider in band - # Adaptive + # WFAdaptive int min_wavefront_length # Adaptive: Minimum wavefronts length to cut-off int max_distance_threshold # Adaptive: Maximum distance between offsets allowed # Drops @@ -74,17 +84,49 @@ cdef extern from "WFA2_lib/wavefront/wavefront_heuristic.h" nogil: # Internals int steps_wait # Score-steps until next cut-off int max_sw_score # Maximum score observed (for x/z drops) + int max_wf_score # Corresponding WF-score (to max_sw_score) int max_sw_score_offset # Offset of the maximum score observed int max_sw_score_k # Diagonal of the maximum score observed + # Setup + void wavefront_heuristic_set_none( + wavefront_heuristic_t* const wf_heuristic) -cdef extern from "WFA2_lib/utils/vector.h" nogil: - # Data Structures - ctypedef struct vector_t: - void* memory - uint64_t used - uint64_t element_size - uint64_t elements_allocated + void wavefront_heuristic_set_wfadaptive( + wavefront_heuristic_t* const wf_heuristic, + const int min_wavefront_length, + const int max_distance_threshold, + const int steps_between_cutoffs) + void wavefront_heuristic_set_wfmash( + wavefront_heuristic_t* const wf_heuristic, + const int min_wavefront_length, + const int max_distance_threshold, + const int steps_between_cutoffs) + + void wavefront_heuristic_set_xdrop( + wavefront_heuristic_t* const wf_heuristic, + const int xdrop, + const int steps_between_cutoffs) + void wavefront_heuristic_set_zdrop( + wavefront_heuristic_t* const wf_heuristic, + const int ydrop, + const int steps_between_cutoffs) + + void wavefront_heuristic_set_banded_static( + wavefront_heuristic_t* const wf_heuristic, + const int band_min_k, + const int band_max_k) + void wavefront_heuristic_set_banded_adaptive( + wavefront_heuristic_t* const wf_heuristic, + const int band_min_k, + const int band_max_k, + const int steps_between_cutoffs) + + # Wavefront heuristic cut-off + bint wavefront_heuristic_cufoff( + wavefront_aligner_t* const wf_aligner, + const int score, + const int score_mod) cdef extern from "WFA2_lib/system/mm_allocator.h" nogil: @@ -105,6 +147,208 @@ cdef extern from "WFA2_lib/system/mm_allocator.h" nogil: mm_allocator_t* const mm_allocator, void* const memory) +cdef extern from "WFA2_lib/system/profiler_counter.h" nogil: + # Counters + ctypedef struct profiler_counter_t: + uint64_t total + uint64_t samples + uint64_t min + uint64_t max + double m_oldM + double m_newM + double m_oldS + double m_newS + + void counter_reset( + profiler_counter_t* const counter) + void counter_add( + profiler_counter_t* const counter, + const uint64_t amount) + + uint64_t counter_get_total(const profiler_counter_t* const counter) + uint64_t counter_get_num_samples(const profiler_counter_t* const counter) + uint64_t counter_get_min(const profiler_counter_t* const counter) + uint64_t counter_get_max(const profiler_counter_t* const counter) + double counter_get_mean(const profiler_counter_t* const counter) + double counter_get_variance(const profiler_counter_t* const counter) + double counter_get_stddev(const profiler_counter_t* const counter) + + void counter_combine_sum( + profiler_counter_t* const counter_dst, + profiler_counter_t* const counter_src) + + void counter_print( + FILE* const stream, + const profiler_counter_t* const counter, + const profiler_counter_t* const ref_counter, + const char* const units, + const bint full_report) + void percentage_print( + FILE* const stream, + const profiler_counter_t* const counter, + const char* const units) + + # Reference Counter (Counts wrt a reference counter. Eg ranks) + ctypedef struct profiler_rcounter_t: + uint64_t begin_count # Counter + profiler_counter_t counter # Total count & samples taken + uint64_t accumulated # Total accumulated + + void rcounter_start( + profiler_rcounter_t* const rcounter, + const uint64_t reference) + void rcounter_stop( + profiler_rcounter_t* const rcounter, + const uint64_t reference) + void rcounter_pause( + profiler_rcounter_t* const rcounter, + const uint64_t reference) + void rcounter_restart( + profiler_rcounter_t* const rcounter, + const uint64_t reference) + void rcounter_reset( + profiler_rcounter_t* const rcounter) + + uint64_t rcounter_get_total(profiler_rcounter_t* const rcounter) + uint64_t rcounter_get_num_samples(profiler_rcounter_t* const rcounter) + uint64_t rcounter_get_min(profiler_rcounter_t* const rcounter) + uint64_t rcounter_get_max(profiler_rcounter_t* const rcounter) + uint64_t rcounter_get_mean(profiler_rcounter_t* const rcounter) + uint64_t rcounter_get_variance(profiler_rcounter_t* const rcounter) + uint64_t rcounter_get_stddev(profiler_rcounter_t* const rcounter) + +cdef extern from "WFA2_lib/system/profiler_timer.h" nogil: + # System time + void timer_get_system_time(timespec *ts); + + # Timers + ctypedef struct profiler_timer_t: + # Timer + timespec begin_timer; # Timer begin + # Total time & samples taken + profiler_counter_t time_ns; + uint64_t accumulated; + + void timer_start(profiler_timer_t* const timer); + void timer_stop(profiler_timer_t* const timer); + void timer_pause(profiler_timer_t* const timer); + void timer_continue(profiler_timer_t* const timer); + void timer_reset(profiler_timer_t* const timer); + + uint64_t timer_get_current_lap_ns(profiler_timer_t* const timer); + uint64_t timer_get_current_total_ns(profiler_timer_t* const timer); + uint64_t timer_get_total_ns(const profiler_timer_t* const timer); + uint64_t timer_get_num_samples(const profiler_timer_t* const timer); + uint64_t timer_get_min_ns(const profiler_timer_t* const timer); + uint64_t timer_get_max_ns(const profiler_timer_t* const timer); + uint64_t timer_get_mean(const profiler_timer_t* const timer); + uint64_t timer_get_variance(const profiler_timer_t* const timer); + uint64_t timer_get_stddev(const profiler_timer_t* const timer); + + void timer_print_total( + FILE* const stream, + const profiler_timer_t* const timer); + + void timer_print( + FILE* const stream, + const profiler_timer_t* const timer, + const profiler_timer_t* const ref_timer); + +cdef extern from "WFA2_lib/utils/heatmap.h" nogil: + # Heatmap + ctypedef enum heatmap_type: + heatmap_min # Min value stays + heatmap_max # Max value stays + heatmap_value # Last value set stays + ctypedef struct heatmap_t: + # Configuration + heatmap_type type; + # Dimensions + int num_rows; + int num_columns; + # Range + int min_v; + int max_v; + int min_h; + int max_h; + float binning_factor; + # Data + int** values; + + # Setup + heatmap_t* heatmap_new( + const heatmap_type type, + const int min_v, + const int max_v, + const int min_h, + const int max_h, + const int resolution_points) + void heatmap_clear( + heatmap_t* const heatmap) + void heatmap_delete( + heatmap_t* const heatmap) + + # Accessors + void heatmap_set( + heatmap_t* const heatmap, + const int v, + const int h, + const int value) + + # Display + void heatmap_print( + FILE* const stream, + heatmap_t* const heatmap) + +cdef extern from "WFA2_lib/wavefront/wavefront_plot.h" nogil: + # Wavefront ahead definition + # ctypedef _wavefront_aligner_t wavefront_aligner_t + + # Wavefront Display + ctypedef struct wavefront_plot_attr_t: + bint enabled # Is plotting enabled + int resolution_points # Total resolution points + int align_level # Level of recursion to plot (-1 == final) + ctypedef struct wavefront_plot_t: + # Configuration + wavefront_plot_attr_t attributes + distance_metric_t distance_metric + int min_v + int max_v + int min_h + int max_h + # Wavefront Heatmaps + heatmap_t* m_heatmap + heatmap_t* i1_heatmap + heatmap_t* d1_heatmap + heatmap_t* i2_heatmap + heatmap_t* d2_heatmap + heatmap_t* behavior_heatmap + + # Setup + wavefront_plot_t* wavefront_plot_new( + const distance_metric_t distance_metric, + const int pattern_length, + const int text_length, + wavefront_plot_attr_t* const attributes) + void wavefront_plot_resize( + wavefront_plot_t* const wf_plot, + const int pattern_length, + const int text_length) + void wavefront_plot_delete( + wavefront_plot_t* const wf_plot) + + # Plot record state + void wavefront_plot( + wavefront_aligner_t* const wf_aligner, + const int score, + const int align_level) + + # Display/Dump + void wavefront_plot_print( + FILE* const stream, + wavefront_aligner_t* const wf_aligner) + cdef extern from "WFA2_lib/wavefront/wavefront_attributes.h" nogil: @@ -120,22 +364,18 @@ cdef extern from "WFA2_lib/wavefront/wavefront_attributes.h" nogil: ctypedef struct alignment_form_t: # Mode alignment_span_t span # Alignment form (End-to-end/Ends-free) + # Extension + bint extension # Activate extension-like alignment # Ends-free int pattern_begin_free # Allow free-gap at the beginning of the pattern int pattern_end_free # Allow free-gap at the end of the pattern int text_begin_free # Allow free-gap at the beginning of the text int text_end_free # Allow free-gap at the end of the text - # Limits - int max_alignment_score # Maximum score allowed before quit - - # Custom extend-match function - ctypedef int (*alignment_match_funct_t)(int,int,void*) # Alignment system configuration - ctypedef struct alignment_system_t: - # Debug - bint check_alignment_correct # Verify that the alignment CIGAR output is correct + # Limits + int max_alignment_steps # Maximum WFA-steps allowed before quit # Probing intervals int probe_interval_global # Score-ticks interval to check any limits int probe_interval_compact # Score-ticks interval to check BT-buffer compacting @@ -146,19 +386,26 @@ cdef extern from "WFA2_lib/wavefront/wavefront_attributes.h" nogil: uint64_t max_memory_abort # Maximum memory allowed to be used before aborting alignment # Verbose # 0 - Quiet - # 1 - Report WFA progress and heavy tasks - # 2 - Report each sequence aligned (brief) - # 3 - Report each sequence aligned (very verbose) + # 1 - Report each sequence aligned (brief) + # 2 - Report each sequence/subsequence aligned (brief) + # 3 - Report WFA progress (heavy tasks) (verbose) + # 4 - Full report of each sequence/subsequence aligned (very verbose) int verbose # Verbose (regulates messages during alignment) + # Debug + bint check_alignment_correct # Verify that the alignment CIGAR output is correct # Profile - # profiler_timer_t timer # Time alignment + profiler_timer_t timer # Time alignment + # OS + int max_num_threads # Maximum number of threads to use to compute/extend WFs + int min_offsets_per_thread # Minumum amount of offsets to spawn a thread # Low-memory modes ctypedef enum wavefront_memory_t: wavefront_memory_high = 0 # High-memore mode (fastest, stores all WFs explicitly) - wavefront_memory_med = 1 # Succing-memory mode (medium, offloads half-full BT-blocks) - wavefront_memory_low = 2 # Succing-memory mode (slow, offloads only full BT-blocks) + wavefront_memory_med = 1 # Succing-memory mode piggyback-based (medium, offloads half-full BT-blocks) + wavefront_memory_low = 2 # Succing-memory mode piggyback-based (slow, offloads only full BT-blocks) + wavefront_memory_ultralow = 3 # Bidirectional WFA #Wavefront Aligner Attributes @@ -175,61 +422,82 @@ cdef extern from "WFA2_lib/wavefront/wavefront_attributes.h" nogil: wavefront_heuristic_t heuristic # Wavefront heuristic # Memory model wavefront_memory_t memory_mode # Wavefront memory strategy (modular wavefronts and piggyback) - # Custom function to compare sequences - alignment_match_funct_t match_funct # Custom matching function (match(v,h,args)) - void* match_funct_arguments # Generic arguments passed to matching function (args) # External MM (instead of allocating one inside) mm_allocator_t* mm_allocator # MM-Allocator # Display - # wavefront_plot_params_t plot_params # Wavefront plot + wavefront_plot_attr_t plot # Plot wavefront # System alignment_system_t system # System related parameters + # Default parameters + cdef extern wavefront_aligner_attr_t wavefront_aligner_attr_default + cdef extern from "WFA2_lib/wavefront/wavefront_penalties.h" nogil: # Distance metrics ctypedef enum distance_metric_t: - indel = 0, # Longest Common Subsequence - LCS - edit = 1, # Levenshtein - gap_linear = 2, # Needleman-Wunsch - gap_affine = 3, # Smith-Waterman-Gotoh - gap_affine_2p = 4 # Concave 2-pieces - - # Penalty adaptation strategy - ctypedef enum wf_penalties_strategy_type: - wavefronts_penalties_force_zero_match - wavefronts_penalties_shifted_penalties + indel = 0, # Longest Common Subsequence - LCS + edit = 1, # Levenshtein + gap_linear = 2, # Needleman-Wunsch + gap_affine = 3, # Smith-Waterman-Gotoh + gap_affine_2p = 4 # Gap-Affine 2-pieces # Wavefront Penalties - ctypedef struct wavefronts_penalties_t: - distance_metric_t distance_metric # Alignment metric/distance used - # int match # (M = 0) - int mismatch # (X > 0) - int gap_opening1 # (O1 > 0) - int gap_extension1 # (E1 > 0) - int gap_opening2 # (O2 > 0) - int gap_extension2 # (E2 > 0) + ctypedef struct wavefront_penalties_t: + distance_metric_t distance_metric # Alignment metric/distance used + int match # (M <= 0) + int mismatch # (X > 0) + int gap_opening1 # (O1 >= 0) + int gap_extension1 # (E1 > 0) + int gap_opening2 # (O2 >= 0) + int gap_extension2 # (E2 > 0) + # Internals + linear_penalties_t linear_penalties # Original gap-linear penalties + affine_penalties_t affine_penalties # Original gap-affine penalties + affine2p_penalties_t affine2p_penalties # Original gap-affine2p penalties + int internal_gap_e # Original gap-extension value (used for z-drop) + + # Penalties adjustment + void wavefront_penalties_set_indel( + wavefront_penalties_t* const wf_penalties) + void wavefront_penalties_set_edit( + wavefront_penalties_t* const wf_penalties) + void wavefront_penalties_set_linear( + wavefront_penalties_t* const wf_penalties, + linear_penalties_t* const linear_penalties) + void wavefront_penalties_set_affine( + wavefront_penalties_t* const wf_penalties, + affine_penalties_t* const affine_penalties) + void wavefront_penalties_set_affine2p( + wavefront_penalties_t* const wf_penalties, + affine2p_penalties_t* const affine2p_penalties) + # Display + void wavefront_penalties_print( + FILE* const stream, + wavefront_penalties_t* const wf_penalties) cdef extern from "WFA2_lib/alignment/cigar.h" nogil: #CIGAR ctypedef struct cigar_t: - # Operations buffer - char* operations - int max_operations - int begin_offset - int end_offset + # Alignment operations + char* operations # Raw alignment operations + int max_operations # Maximum buffer size + int begin_offset # Eegin_offset + int end_offset # End offset # Score - int score - # MM - mm_allocator_t* mm_allocator + int score # Computed score + int end_v # Alignment-end vertical coordinate (pattern characters aligned) + int end_h # Alignment-end horizontal coordinate (text characters aligned) + # CIGAR (SAM compliant) + bint has_misms # Show 'X' and '=' instead of just 'M' + uint32_t* cigar_buffer # CIGAR-operations (max operations length) + int cigar_length # total CIGAR operations # Setup - # void cigar_allocate( - # cigar_t* const cigar, - # const int max_operations, - # mm_allocator_t* const mm_allocator) + cigar_t* cigar_new( + const int max_operations) void cigar_clear( cigar_t* const cigar) void cigar_resize( @@ -239,15 +507,38 @@ cdef extern from "WFA2_lib/alignment/cigar.h" nogil: cigar_t* const cigar) # Accessors - int cigar_get_matches( + bint cigar_is_null( cigar_t* const cigar) - void cigar_add_mismatches( - char* const pattern, - const int pattern_length, - char* const text, - const int text_length, + + int cigar_count_matches( cigar_t* const cigar) + void cigar_append_forward( + cigar_t* const cigar_dst, + cigar_t* const cigar_src + ) + void cigar_append_reverse( + cigar_t* const cigar_dst, + cigar_t* const cigar_src + ) + + void cigar_append_deletion( + cigar_t* const cigar, + const int length + ) + void cigar_append_insertion( + cigar_t* const cigar, + const int length + ) + + # SAM-compliant CIGAR + void cigar_get_CIGAR( + cigar_t* const cigar, + const bint show_mismatches, + uint32_t** const cigar_buffer, + int* const cigar_length + ) + # Score int cigar_score_edit( cigar_t* const cigar) @@ -268,6 +559,29 @@ cdef extern from "WFA2_lib/alignment/cigar.h" nogil: void cigar_copy( cigar_t* const cigar_dst, cigar_t* const cigar_src) + + void cigar_discover_mismatches( + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + cigar_t* const cigar + ) + + bint cigar_maxtrim_gap_linear( + cigar_t* const cigar, + linear_penalties_t* const penalties + ) + bint cigar_maxtrim_gap_affine( + cigar_t* const cigar, + affine_penalties_t* const penalties + ) + bint cigar_maxtrim_gap_affine2p( + cigar_t* const cigar, + affine2p_penalties_t* const penalties + ) + + # Check bint cigar_check_alignment( FILE* const stream, const char* const pattern, @@ -286,82 +600,620 @@ cdef extern from "WFA2_lib/alignment/cigar.h" nogil: char* buffer, cigar_t* const cigar, const bint print_matches) + + void cigar_print_SAM_CIGAR( + FILE* const stream, + cigar_t* const cigar, + const bint show_mismatches) + void cigar_sprint_SAM_CIGAR( + char* const buffer, + cigar_t* const cigar, + const bint show_mismatches) + void cigar_print_pretty( FILE* const stream, + cigar_t* const cigar, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length) + + +cdef extern from "WFA2_lib/wavefront/wavefront_sequences.h" nogil: + ctypedef int (*alignment_match_funct_t)(int,int,void*) + + # Wavefront sequences + ctypedef enum wf_sequences_mode_t: + wf_sequences_ascii = 0 + wf_sequences_lambda = 1 + wf_sequences_packed2bits = 2 + ctypedef struct wavefront_sequences_t: + # Mode + wf_sequences_mode_t mode # Sequences mode + bint reverse # Reverse sequences + # Current sequences & bounds + char* pattern # Pointer to current pattern sequence (padded) + char* text # Pointer to current text sequence (padded) + int pattern_begin # Pattern begin offset + int pattern_length # Pattern length + int text_begin # Text begin offset + int text_length # Text length + # Lambda Sequence + alignment_match_funct_t match_funct # Custom matching function (match(v,h,args)) + void* match_funct_arguments # Generic arguments passed to matching function (args) + # Internal buffers (ASCII encoded) + char* seq_buffer # Internal buffer + int seq_buffer_allocated # Internal buffer allocated + char* pattern_buffer # Source pattern sequence + char* text_buffer # Source text sequence + int pattern_buffer_length # Source pattern length + int text_buffer_length # Source text length + char pattern_eos # Source pattern char at EOS + char text_eos # Source pattern char at EOS + + # Setup + void wavefront_sequences_allocate( + wavefront_sequences_t* const wf_sequences); + void wavefront_sequences_free( + wavefront_sequences_t* const wf_sequences); + + # Init Sequences + void wavefront_sequences_init_ascii( + wavefront_sequences_t* const wf_sequences, const char* const pattern, const int pattern_length, const char* const text, const int text_length, - cigar_t* const cigar, + const bint reverse); + void wavefront_sequences_init_lambda( + wavefront_sequences_t* const wf_sequences, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length, + const bint reverse); + void wavefront_sequences_init_packed2bits( + wavefront_sequences_t* const wf_sequences, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length, + const bint reverse); + +cdef extern from "WFA2_lib/wavefront/wavefront_offset.h" nogil: + ctypedef int32_t wf_offset_t + ctypedef uint32_t wf_unsigned_offset_t + +cdef extern from "WFA2_lib/wavefront/wavefront_pcigar.h" nogil: + ctypedef uint32_t pcigar_t; + # Accessors + int pcigar_get_length( + const pcigar_t pcigar); + int pcigar_unpack( + pcigar_t pcigar, + char* cigar_buffer); + + # PCIGAR unpack + void pcigar_unpack_linear( + pcigar_t pcigar, + wavefront_sequences_t* const sequences, + int* const v_pos, + int* const h_pos, + char* cigar_buffer, + int* const cigar_length); + void pcigar_unpack_affine( + pcigar_t pcigar, + wavefront_sequences_t* const sequences, + int* const v_pos, + int* const h_pos, + char* cigar_buffer, + int* const cigar_length, + affine_matrix_type* const current_matrix_type); + + # Display + void pcigar_print( + FILE* const stream, + pcigar_t pcigar); + +cdef extern from "WFA2_lib/utils/bitmap.h" nogil: + # Bitmap + ctypedef struct bitmap_block_t: + uint64_t counter + uint64_t bitmap + ctypedef struct bitmap_t: + # Bitmap + uint64_t num_blocks + bitmap_block_t* bitmap_blocks + # MM + mm_allocator_t* mm_allocator + + # Setup + bitmap_t* bitmap_new( + const uint64_t length, + mm_allocator_t* const mm_allocator) + void bitmap_delete( + bitmap_t* const bitmap) + + # Accessors + void bitmap_set( + bitmap_t* const bitmap, + const uint64_t pos) + bint bitmap_is_set( + bitmap_t* const bitmap, + const uint64_t pos) + bint bitmap_check__set( + bitmap_t* const bitmap, + const uint64_t pos) + + # Rank + void bitmap_update_counters( + bitmap_t* const bitmap) + uint64_t bitmap_erank( + bitmap_t* const bitmap, + const uint64_t pos) + + +cdef extern from "WFA2_lib/wavefront/wavefront_backtrace_buffer.h" nogil: + # Separated Backtrace Block + ctypedef uint32_t bt_block_idx_t # Up to 2^31 references (~32GB of not-compactable pCIGARs) + + ctypedef packed struct bt_block_t: + pcigar_t pcigar # Packed CIGAR + bt_block_idx_t prev_idx # Index of the previous BT-block + + # Backtrace initial positions + ctypedef struct wf_backtrace_init_pos_t: + int v + int h + + # Backtrace Buffer + ctypedef struct wf_backtrace_buffer_t: + # Locator + int segment_idx # Current segment idx + int segment_offset # Current free position within segment + bt_block_t* block_next # Next BT-block free + # Buffers # + vector_t* segments # Memory segments (bt_block_t*) + vector_t* alignment_init_pos # Buffer to store alignment's initial coordinates (h,v) (wf_backtrace_init_pos_t) + bt_block_idx_t num_compacted_blocks # Total compacted blocks in BT-buffer compacted (dense from 0..num_compacted_blocks-1) + int num_compactions # Total compactions performed + # Internal buffers # + vector_t* alignment_packed # Temporal buffer to store final alignment (pcigar_t) + vector_t* prefetch_blocks_idxs # Temporal buffer to store blocks_idxs (bt_block_idx_t) + # MM + mm_allocator_t* mm_allocator + + # Setup + wf_backtrace_buffer_t* wf_backtrace_buffer_new( + mm_allocator_t* const mm_allocator) + void wf_backtrace_buffer_clear( + wf_backtrace_buffer_t* const bt_buffer) + void wf_backtrace_buffer_reap( + wf_backtrace_buffer_t* const bt_buffer) + void wf_backtrace_buffer_delete( + wf_backtrace_buffer_t* const bt_buffer) + + # Accessors + void wf_backtrace_buffer_add_used( + wf_backtrace_buffer_t* const bt_buffer, + const int used) + bt_block_idx_t wf_backtrace_buffer_get_mem( + wf_backtrace_buffer_t* const bt_buffer, + bt_block_t** const bt_block_mem, + int* const bt_blocks_available) + + # Store blocks + bt_block_idx_t wf_backtrace_buffer_init_block( + wf_backtrace_buffer_t* const bt_buffer, + const int v, + const int h) + + # Unpack CIGAR + bt_block_t* wf_backtrace_buffer_traceback_pcigar( + wf_backtrace_buffer_t* const bt_buffer, + bt_block_t* bt_block) + void wf_backtrace_buffer_unpack_cigar_linear( + wf_backtrace_buffer_t* const bt_buffer, + wavefront_sequences_t* const sequences, + const int begin_v, + const int begin_h, + const int end_v, + const int end_h, + cigar_t* const cigar) + void wf_backtrace_buffer_unpack_cigar_affine( + wf_backtrace_buffer_t* const bt_buffer, + wavefront_sequences_t* const sequences, + const int begin_v, + const int begin_h, + const int end_v, + const int end_h, + cigar_t* const cigar) + + # Compact + void wf_backtrace_buffer_mark_backtrace( + wf_backtrace_buffer_t* const bt_buffer, + const bt_block_idx_t bt_block_idx, + bitmap_t* const bitmap) + void wf_backtrace_buffer_mark_backtrace_batch( + wf_backtrace_buffer_t* const bt_buffer, + wf_offset_t* const offsets, + bt_block_idx_t* const bt_block_idxs, + const int num_block_idxs, + bitmap_t* const bitmap) + + bt_block_idx_t wf_backtrace_buffer_compact_marked( + wf_backtrace_buffer_t* const bt_buffer, + bitmap_t* const bitmap, + const int verbose) + + # Utils + uint64_t wf_backtrace_buffer_get_used( + wf_backtrace_buffer_t* const bt_buffer) + + bt_block_idx_t wf_backtrace_buffer_get_num_compacted_blocks( + wf_backtrace_buffer_t* const bt_buffer) + void wf_backtrace_buffer_set_num_compacted_blocks( + wf_backtrace_buffer_t* const bt_buffer, + const bt_block_idx_t num_compacted_blocks) + void wf_backtrace_buffer_reset_compaction( + wf_backtrace_buffer_t* const bt_buffer) + + uint64_t wf_backtrace_buffer_get_size_allocated( + wf_backtrace_buffer_t* const bt_buffer) + uint64_t wf_backtrace_buffer_get_size_used( + wf_backtrace_buffer_t* const bt_buffer) + + +cdef extern from "WFA2_lib/wavefront/wavefront.h" nogil: + # Alignment position + ctypedef struct wavefront_pos_t: + int score # Score + int k # Diagonal + wf_offset_t offset # Offset + + # Wavefront + ctypedef enum wavefront_status_type: + wavefront_status_free + wavefront_status_busy + wavefront_status_deallocated + ctypedef struct wavefront_t: + # Dimensions + bint null # Is null interval? + int lo # Lowest diagonal (inclusive) + int hi # Highest diagonal (inclusive) + # Wavefront elements # + wf_offset_t* offsets # Offsets (k-centered) + wf_offset_t* offsets_mem # Offsets base memory (Internal) + # Piggyback backtrace # + int bt_occupancy_max # Maximum number of pcigar-ops stored on the Backtrace-block + pcigar_t* bt_pcigar # Backtrace-block pcigar (k-centered) + bt_block_idx_t* bt_prev # Backtrace-block previous-index (k-centered) + pcigar_t* bt_pcigar_mem # Backtrace-block (base memory - Internal) + bt_block_idx_t* bt_prev_mem # Backtrace-block previous-index (base memory - Internal) + # Slab internals # + wavefront_status_type status # Wavefront status (memory state) + int wf_elements_allocated # Total wf-elements allocated (max. wf. size) + int wf_elements_allocated_min # Minimum diagonal-element wf-element allocated + int wf_elements_allocated_max # Maximum diagonal-element wf-element allocated + int wf_elements_init_min # Minimum diagonal-element initialized (inclusive) + int wf_elements_init_max # Maximum diagonal-element initialized (inclusive) + + # Wavefront Set + ctypedef struct wavefront_set_t: + # In Wavefronts + wavefront_t* in_mwavefront_misms + wavefront_t* in_mwavefront_open1 + wavefront_t* in_mwavefront_open2 + wavefront_t* in_i1wavefront_ext + wavefront_t* in_i2wavefront_ext + wavefront_t* in_d1wavefront_ext + wavefront_t* in_d2wavefront_ext + # Out Wavefronts + wavefront_t* out_mwavefront + wavefront_t* out_i1wavefront + wavefront_t* out_i2wavefront + wavefront_t* out_d1wavefront + wavefront_t* out_d2wavefront + + # Setup + void wavefront_allocate( + wavefront_t* const wavefront, + const int wf_elements_allocated, + const bint allocate_backtrace, + mm_allocator_t* const mm_allocator) + void wavefront_resize( + wavefront_t* const wavefront, + const int wf_elements_allocated, + mm_allocator_t* const mm_allocator) + void wavefront_free( + wavefront_t* const wavefront, + mm_allocator_t* const mm_allocator) + + # Initialization + void wavefront_init( + wavefront_t* const wavefront, + const int min_lo, + const int max_hi) + void wavefront_init_null( + wavefront_t* const wavefront, + const int min_lo, + const int max_hi) + void wavefront_init_victim( + wavefront_t* const wavefront, + const int min_lo, + const int max_hi) + + # Accessors + void wavefront_set_limits( + wavefront_t* const wavefront, + const int lo, + const int hi) + + # Utils + uint64_t wavefront_get_size( + wavefront_t* const wavefront) + + +cdef extern from "WFA2_lib/wavefront/wavefront_components.h" nogil: + # Wavefront Components + ctypedef struct wavefront_components_t: + # Configuration + bint memory_modular # Memory strategy (modular wavefronts) + bint bt_piggyback # Backtrace Piggyback + # Wavefronts dimensions # + int num_wavefronts # Total number of allocated wavefronts + int max_score_scope # Maximum score-difference between dependent wavefronts + int historic_max_hi # Maximum WF hi-limit seen during current alignment + int historic_min_lo # Minimum WF lo-limit seen during current alignment + # Wavefronts # + wavefront_t** mwavefronts # M-wavefronts + wavefront_t** i1wavefronts # I1-wavefronts + wavefront_t** i2wavefronts # I2-wavefronts + wavefront_t** d1wavefronts # D1-wavefronts + wavefront_t** d2wavefronts # D2-wavefronts + wavefront_t* wavefront_null # Null wavefront (orthogonal reading) + wavefront_t* wavefront_victim # Dummy wavefront (orthogonal writing) + # BT-Buffer # + wf_backtrace_buffer_t* bt_buffer # Backtrace Buffer + # MM # + mm_allocator_t* mm_allocator # MM-Allocator + + # Setup + void wavefront_components_allocate( + wavefront_components_t* const wf_components, + const int max_pattern_length, + const int max_text_length, + wavefront_penalties_t* const penalties, + const bint memory_modular, + const bint bt_piggyback, + mm_allocator_t* const mm_allocator) + void wavefront_components_reap( + wavefront_components_t* const wf_components) + void wavefront_components_clear( + wavefront_components_t* const wf_components) + void wavefront_components_free( + wavefront_components_t* const wf_components) + + # Resize + void wavefront_components_resize( + wavefront_components_t* const wf_components, + const int max_pattern_length, + const int max_text_length, + wavefront_penalties_t* const penalties) + void wavefront_components_resize_null__victim( + wavefront_components_t* const wf_components, + const int lo, + const int hi) + + # Compact + void wavefront_components_compact_bt_buffer( + wavefront_components_t* const wf_components, + const int score, + const int verbose) + + +cdef extern from "WFA2_lib/wavefront/wavefront_bialigner.h" nogil: + ctypedef struct wf_bialign_breakpoint_t: + # Scores + int score # Score total + int score_forward # Score (forward) + int score_reverse # Score (reverse) + # Location # + int k_forward # Breakpoint diagonal (forward) + int k_reverse # Breakpoint diagonal (reverse) + wf_offset_t offset_forward # Offset (forward) + wf_offset_t offset_reverse # Offset (reverse) + affine2p_matrix_type component # Component (M/I/D) + + ctypedef struct wavefront_bialigner_t: + # Wavefronts + wavefront_aligner_t* wf_forward # Breakpoint Forward aligner + wavefront_aligner_t* wf_reverse # Breakpoint Reverse aligner + wavefront_aligner_t* wf_base # Base/Subsidiary aligner + # Operators + void (*wf_align_compute)(wavefront_aligner_t* const,const int) + + # Setup + wavefront_bialigner_t* wavefront_bialigner_new( + wavefront_aligner_attr_t* const attributes, + wavefront_plot_t* const plot) + void wavefront_bialigner_reap( + wavefront_bialigner_t* const wf_bialigner) + void wavefront_bialigner_delete( + wavefront_bialigner_t* const wf_bialigner) + + # Sequences + void wavefront_bialigner_set_sequences_ascii( + wavefront_bialigner_t* const wf_bialigner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length) + void wavefront_bialigner_set_sequences_lambda( + wavefront_bialigner_t* const wf_bialigner, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length) + void wavefront_bialigner_set_sequences_packed2bits( + wavefront_bialigner_t* const wf_bialigner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length) + void wavefront_bialigner_set_sequences_bounds( + wavefront_bialigner_t* const wf_bialigner, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end) + + # Accessors + uint64_t wavefront_bialigner_get_size( + wavefront_bialigner_t* const wf_bialigner) + void wavefront_bialigner_set_heuristic( + wavefront_bialigner_t* const wf_bialigner, + wavefront_heuristic_t* const heuristic) + void wavefront_bialigner_set_max_alignment_steps( + wavefront_bialigner_t* const wf_bialigner, + const int max_alignment_steps) + void wavefront_bialigner_set_max_memory( + wavefront_bialigner_t* const wf_bialigner, + const uint64_t max_memory_resident, + const uint64_t max_memory_abort) + void wavefront_bialigner_set_max_num_threads( + wavefront_bialigner_t* const wf_bialigner, + const int max_num_threads) + void wavefront_bialigner_set_min_offsets_per_thread( + wavefront_bialigner_t* const wf_bialigner, + const int min_offsets_per_thread) + + +cdef extern from "WFA2_lib/wavefront/wavefront_slab.h" nogil: + ctypedef enum wf_slab_mode_t: + wf_slab_reuse = 1 # Keep all wavefronts (Reap only by demand) + wf_slab_tight = 2 # Reap all if wavefronts are resized + ctypedef struct wavefront_slab_t: + # Attributes + bint allocate_backtrace # WFs require BT-vector + wf_slab_mode_t slab_mode # Slab strategy + # Wavefront Slabs # + int init_wf_length # Initial wf-elements allocated + int current_wf_length # Current wf-elements allocated + vector_t* wavefronts # All wavefronts (wavefront_t*) + vector_t* wavefronts_free # Free wavefronts (wavefront_t*) + # Stats # + uint64_t memory_used # Memory used (Bytes) + # MM # + mm_allocator_t* mm_allocator # MM-Allocator + + # Setup + wavefront_slab_t* wavefront_slab_new( + const int init_wf_length, + const bint allocate_backtrace, + const wf_slab_mode_t slab_mode, mm_allocator_t* const mm_allocator) + void wavefront_slab_reap( + wavefront_slab_t* const wavefront_slab) + void wavefront_slab_clear( + wavefront_slab_t* const wavefront_slab) + void wavefront_slab_delete( + wavefront_slab_t* const wavefront_slab) + # Accessors + void wavefront_slab_set_mode( + wavefront_slab_t* const wavefront_slab, + const wf_slab_mode_t slab_mode) + + # Allocator + wavefront_t* wavefront_slab_allocate( + wavefront_slab_t* const wavefront_slab, + const int min_lo, + const int max_hi) + void wavefront_slab_free( + wavefront_slab_t* const wavefront_slab, + wavefront_t* const wavefront) + + # Utils + uint64_t wavefront_slab_get_size( + wavefront_slab_t* const wavefront_slab) -cdef extern from "WFA2_lib/wavefront/wavefront_aligner.h" nogil: +cdef extern from "WFA2_lib/wavefront/wfa.h" nogil: # Error codes & messages - DEF WF_STATUS_SUCCESSFUL = 0 - DEF WF_STATUS_IN_PROGRESS = 1 - DEF WF_STATUS_HEURISTICALY_DROPPED = -1 - DEF WF_STATUS_MAX_SCORE_REACHED = -2 - DEF WF_STATUS_OOM = -3 - extern char* wf_error_msg[5] + # [OK] + DEF WF_STATUS_ALG_COMPLETED = 0 # Success (Complete alignment found) + DEF WF_STATUS_ALG_PARTIAL = 1 # Success (Partial alignment found) + # [FAIL] + DEF WF_STATUS_MAX_STEPS_REACHED = -100 # Maximum number of WFA-steps reached + DEF WF_STATUS_OOM = -200 # Maximum memory limit reached + DEF WF_STATUS_UNATTAINABLE = -300 # Alignment unattainable under configured heuristics + # [INTERNAL] + DEF WF_STATUS_OK = -1 # Computing alignment (in progress) + DEF WF_STATUS_END_REACHED = -2 # Alignment end reached + DEF WF_STATUS_END_UNREACHABLE = -3 # Alignment end unreachable under current configuration (e.g. Z-drop) + + # error messages char* wavefront_align_strerror(const int wf_error_code) + char* wavefront_align_strerror_short (const int error_code) # Alignment status ctypedef struct _wavefront_aligner_t - ctypedef _wavefront_aligner_t wavefront_aligner_t + # ctypedef _wavefront_aligner_t wavefront_aligner_t ctypedef struct wavefront_align_status_t: # Status int status # Status code int score # Current WF-alignment score + bint dropped # Heuristically dropped + int num_null_steps # Total contiguous null-steps performed + uint64_t memory_used # Total memory used # Wavefront alignment functions void (*wf_align_compute)(wavefront_aligner_t* const,const int) # WF Compute function bint (*wf_align_extend)(wavefront_aligner_t* const,const int) # WF Extend function + # Alignment type + ctypedef enum wavefront_align_mode_t: + wf_align_regular = 0 + wf_align_biwfa = 1 + wf_align_biwfa_breakpoint_forward = 2 + wf_align_biwfa_breakpoint_reverse = 3 + wf_align_biwfa_subsidiary = 4 + # Wavefront Aligner ctypedef struct _wavefront_aligner_t: - # Status + # Mode and Status + wavefront_align_mode_t align_mode # WFA alignment mode + char* align_mode_tag # WFA mode tag wavefront_align_status_t align_status # Current alignment status # Sequences - # strings_padded_t* sequences # Padded sequences - char* pattern # Pattern sequence (padded) - int pattern_length # Pattern length - char* text # Text sequence (padded) - int text_length # Text length + wavefront_sequences_t* sequences # Input sequences # Alignment Attributes alignment_scope_t alignment_scope # Alignment scope (score only or full-CIGAR) alignment_form_t alignment_form # Alignment form (end-to-end/ends-free) - wavefronts_penalties_t penalties # Alignment penalties + wavefront_penalties_t penalties # Alignment penalties wavefront_heuristic_t heuristic # Heuristic's parameters wavefront_memory_t memory_mode # Wavefront memory strategy (modular wavefronts and piggyback) - # Custom function to compare sequences - # alignment_match_funct_t match_funct # Custom matching function (match(v,h,args)) - # void* match_funct_arguments # Generic arguments passed to matching function (args) # Wavefront components - # wavefront_components_t wf_components # Wavefront components + wavefront_components_t wf_components # Wavefront components + affine2p_matrix_type component_begin # Alignment begin component + affine2p_matrix_type component_end # Alignment end component + wavefront_pos_t alignment_end_pos # alignment end position + # Bidirectional alignment + wavefront_bialigner_t* bialigner # BiWFA aligner # CIGAR - cigar_t cigar # Alignment CIGAR + cigar_t* cigar # Alignment CIGAR # MM - # bint mm_allocator_own # Ownership of MM-Allocator + bint mm_allocator_own # Ownership of MM-Allocator mm_allocator_t* mm_allocator # MM-Allocator - # wavefront_slab_t* wavefront_slab # MM-Wavefront-Slab (Allocates/Reuses the individual wavefronts) + wavefront_slab_t* wavefront_slab # MM-Wavefront-Slab (Allocates/Reuses the individual wavefronts) # Display - # wavefront_plot_params_t plot_params # Wavefront plot parameters - # wavefront_plot_t wf_plot # Wavefront plot + wavefront_plot_t plot # Wavefront plot # System - # alignment_system_t system # System related parameters - - # ctypedef _wavefront_aligner_t wavefront_aligner_t + alignment_system_t system # System related parameters # Setup wavefront_aligner_t* wavefront_aligner_new( wavefront_aligner_attr_t* attributes) - void wavefront_aligner_resize( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length) void wavefront_aligner_reap( wavefront_aligner_t* const wf_aligner) void wavefront_aligner_delete( @@ -376,20 +1228,18 @@ cdef extern from "WFA2_lib/wavefront/wavefront_aligner.h" nogil: const int pattern_end_free, const int text_begin_free, const int text_end_free) + void wavefront_aligner_set_alignment_extension( + wavefront_aligner_t* const wf_aligner) # Heuristic configuration void wavefront_aligner_set_heuristic_none( wavefront_aligner_t* const wf_aligner) - void wavefront_aligner_set_heuristic_banded_static( - wavefront_aligner_t* const wf_aligner, - const int band_min_k, - const int band_max_k) - void wavefront_aligner_set_heuristic_banded_adaptive( + void wavefront_aligner_set_heuristic_wfadaptive( wavefront_aligner_t* const wf_aligner, - const int band_min_k, - const int band_max_k, + const int min_wavefront_length, + const int max_distance_threshold, const int score_steps) - void wavefront_aligner_set_heuristic_wfadaptive( + void wavefront_aligner_set_heuristic_wfmash( wavefront_aligner_t* const wf_aligner, const int min_wavefront_length, const int max_distance_threshold, @@ -402,170 +1252,48 @@ cdef extern from "WFA2_lib/wavefront/wavefront_aligner.h" nogil: wavefront_aligner_t* const wf_aligner, const int ydrop, const int score_steps) - - # Match-funct configuration - void wavefront_aligner_set_match_funct( + void wavefront_aligner_set_heuristic_banded_static( + wavefront_aligner_t* const wf_aligner, + const int band_min_k, + const int band_max_k) + void wavefront_aligner_set_heuristic_banded_adaptive( wavefront_aligner_t* const wf_aligner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments) + const int band_min_k, + const int band_max_k, + const int score_steps) # System configuration - void wavefront_aligner_set_max_alignment_score( + void wavefront_aligner_set_max_alignment_steps( wavefront_aligner_t* const wf_aligner, - const int max_alignment_score) + const int max_alignment_steps) void wavefront_aligner_set_max_memory( wavefront_aligner_t* const wf_aligner, - const uint64_t max_memory_compact, const uint64_t max_memory_resident, const uint64_t max_memory_abort) - - # Utils - uint64_t wavefront_aligner_get_size( - wavefront_aligner_t* const wf_aligner) - - # /* - # * Display - # */ - # void wavefront_aligner_print_status( - # FILE* const stream, - # wavefront_aligner_t* const wf_aligner, - # const int current_score) - - -cdef extern from "WFA2_lib/wavefront/wavefront_heuristic.h" nogil: - # Wavefront ahead definition - # ctypedef struct _wavefront_aligner_t wavefront_aligner_t - - # Wavefront Heuristics - # ctypedef enum wf_heuristic_strategy: - # wf_heuristic_none = 0x0000000000000000ul - # wf_heuristic_banded_static = 0x0000000000000001ul - # wf_heuristic_banded_adaptive = 0x0000000000000002ul - # wf_heuristic_wfadaptive = 0x0000000000000004ul - # wf_heuristic_xdrop = 0x0000000000000010ul - # wf_heuristic_zdrop = 0x0000000000000020ul - # - # ctypedef struct wavefront_heuristic_t: - # # Heuristic - # wf_heuristic_strategy strategy # Heuristic strategy - # int steps_between_cutoffs # Score-steps between heuristic cut-offs - # # Banded - # int min_k # Banded: Minimum k to consider in band - # int max_k # Banded: Maximum k to consider in band - # # Adaptive - # int min_wavefront_length # Adaptive: Minimum wavefronts length to cut-off - # int max_distance_threshold # Adaptive: Maximum distance between offsets allowed - # # Drops - # int xdrop # X-drop parameter - # int zdrop # Z-drop parameter - # # Internals - # int steps_wait # Score-steps until next cut-off - # int max_sw_score # Maximum score observed (for x/z drops) - # int max_sw_score_offset # Offset of the maximum score observed - # int max_sw_score_k # Diagonal of the maximum score observed - - # Setup - void wavefront_heuristic_set_none( - wavefront_heuristic_t* const wf_heuristic) - void wavefront_heuristic_set_banded_static( - wavefront_heuristic_t* const wf_heuristic, - const int band_min_k, - const int band_max_k) - void wavefront_heuristic_set_banded_adaptive( - wavefront_heuristic_t* const wf_heuristic, - const int band_min_k, - const int band_max_k, - const int steps_between_cutoffs) - void wavefront_heuristic_set_wfadaptive( - wavefront_heuristic_t* const wf_heuristic, - const int min_wavefront_length, - const int max_distance_threshold, - const int steps_between_cutoffs) - void wavefront_heuristic_set_xdrop( - wavefront_heuristic_t* const wf_heuristic, - const int xdrop, - const int steps_between_cutoffs) - void wavefront_heuristic_set_zdrop( - wavefront_heuristic_t* const wf_heuristic, - const int ydrop, - const int steps_between_cutoffs) - - void wavefront_heuristic_clear( - wavefront_heuristic_t* const wf_heuristic) - - # Wavefront heuristic cut-off - bint wavefront_heuristic_cufoff( + void wavefront_aligner_set_max_num_threads( wavefront_aligner_t* const wf_aligner, - const int score) - - - # Default parameters -cdef extern from "WFA2_lib/wavefront/wavefront_attributes.c" nogil: - cdef extern wavefront_aligner_attr_t wavefront_aligner_attr_default - - - -cdef extern from "WFA2_lib/wavefront/wavefront_penalties.h" nogil: - - # Distance metrics - # ctypedef enum distance_metric_t: - # indel = 0, # Longest Common Subsequence - LCS - # edit = 1, # Levenshtein - # gap_linear = 2, # Needleman-Wunsch - # gap_affine = 3, # Smith-Waterman-Gotoh - # gap_affine_2p = 4 # Concave 2-pieces - # - # # Penalty adaptation strategy - # ctypedef enum wf_penalties_strategy_type: - # wavefronts_penalties_force_zero_match - # wavefronts_penalties_shifted_penalties - # - # # Wavefront Penalties - # ctypedef struct wavefronts_penalties_t: - # distance_metric_t distance_metric # Alignment metric/distance used - # # int match # (M = 0) - # int mismatch # (X > 0) - # int gap_opening1 # (O1 > 0) - # int gap_extension1 # (E1 > 0) - # int gap_opening2 # (O2 > 0) - # int gap_extension2 # (E2 > 0) - - # Penalties adjustment - void wavefronts_penalties_set_indel( - wavefronts_penalties_t* const wavefronts_penalties) - void wavefronts_penalties_set_edit( - wavefronts_penalties_t* const wavefronts_penalties) - void wavefronts_penalties_set_linear( - wavefronts_penalties_t* const wavefronts_penalties, - linear_penalties_t* const linear_penalties, - const wf_penalties_strategy_type penalties_strategy) - void wavefronts_penalties_set_affine( - wavefronts_penalties_t* const wavefronts_penalties, - affine_penalties_t* const affine_penalties, - const wf_penalties_strategy_type penalties_strategy) - void wavefronts_penalties_set_affine2p( - wavefronts_penalties_t* const wavefronts_penalties, - affine2p_penalties_t* const affine2p_penalties, - const wf_penalties_strategy_type penalties_strategy) - - # Display - # void wavefronts_penalties_print( - # FILE* const stream, - # wavefronts_penalties_t* const wavefronts_penalties) - - # Display - # void wavefront_heuristic_print( - # FILE* const stream, - # wavefront_heuristic_t* const wf_heuristic) - + const int max_num_threads) + void wavefront_aligner_set_min_offsets_per_thread( + wavefront_aligner_t* const wf_aligner, + const int min_offsets_per_thread) -cdef extern from "WFA2_lib/wavefront/wavefront_align.h" nogil: - # Wavefront Alignment + # Wavefront Align int wavefront_align( wavefront_aligner_t* const wf_aligner, const char* const pattern, const int pattern_length, const char* const text, const int text_length) - int wavefront_align_resume( - wavefront_aligner_t* const wf_aligner) + int wavefront_align_lambda( + wavefront_aligner_t* const wf_aligner, + const alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length) + int wavefront_align_packed2bits( + wavefront_aligner_t* const wf_aligner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length) + diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 42be30d..d34e010 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -430,8 +430,7 @@ cdef class WavefrontAligner: outfile = fopen(fname, "w") else: outfile = stdout - wfa.cigar_print_pretty(outfile, p, len(p), t, len(t), &self.wf_aligner.cigar, - self.wf_aligner.mm_allocator) + wfa.cigar_print_pretty(outfile, self.wf_aligner.cigar, p, len(p), t, len(t)) if file_name: fclose(outfile) @@ -450,7 +449,7 @@ cdef class WavefrontAligner: cdef char last_op cdef int last_op_length, i, length - cigar = &self.wf_aligner.cigar + cigar = self.wf_aligner.cigar # Check null CIGAR if cigar.begin_offset >= cigar.end_offset: @@ -478,7 +477,7 @@ cdef class WavefrontAligner: cdef char last_op cdef int last_op_length, i, length - cigar = &self.wf_aligner.cigar + cigar = self.wf_aligner.cigar # Check null CIGAR if cigar.begin_offset >= cigar.end_offset: From 59f35ddd5d7e7081f0c13750f4e064c5edfc502c Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Mon, 11 Dec 2023 16:20:26 +0100 Subject: [PATCH 02/11] implement wildcard functionality --- pywfa/align.pxd | 1 + pywfa/align.pyx | 47 ++++++++++++++++++++++++++++++----------------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/pywfa/align.pxd b/pywfa/align.pxd index 390f3dd..ad8aa1c 100644 --- a/pywfa/align.pxd +++ b/pywfa/align.pxd @@ -8,5 +8,6 @@ cdef class WavefrontAligner: cdef wfa.wavefront_aligner_attr_t* attributes cdef wfa.wavefront_aligner_t* wf_aligner cdef str _pattern, _text + cdef char _wildcard cdef bint score_only cdef public int match_score, alignment_score, text_len, pattern_len diff --git a/pywfa/align.pyx b/pywfa/align.pyx index d34e010..319445e 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -300,6 +300,14 @@ cpdef cigartuples_to_str(cigartuples): cigarstring += f"{l}{str_codes[opp]}" return cigarstring +ctypedef struct wildcard_fun_args: + char* pattern + char* query + char wildcard + +cdef int wildcard_match_fun(int pattern_pos, int query_pos, void* argsptr) noexcept nogil: + cdef const wildcard_fun_args* args = argsptr + return args[0].pattern[pattern_pos] == args[0].wildcard or args[0].query[query_pos] == args[0].wildcard or args[0].pattern[pattern_pos] == args[0].query[query_pos] cdef class WavefrontAligner: """Wrapper class for WFA2-lib. If a pattern is supplied, it will be cached for re-use @@ -326,16 +334,23 @@ cdef class WavefrontAligner: int max_distance_threshold=50, int steps_between_cutoffs=1, int xdrop=20, + wildcard=None ): self.pattern_len = 0 self.text_len = 0 if pattern: - self._pattern = pattern + self._pattern = pattern.upper() # could get a malloc version working # self.attributes = malloc(sizeof(wfa.wavefront_aligner_attr_default)) self.attributes = &wfa.wavefront_aligner_attr_default + if wildcard is not None: + if not isinstance(wildcard, str): + raise TypeError(f"expected wildcard to be a string, but it is {type(wildcard)}") + if len(wildcard) > 1: + raise ValueError(f"wildcard must have length 1, but has length {len(wildcard)}") + self._wildcard = wildcard.upper().encode("ascii")[0] if distance == "affine": self.attributes.distance_metric = wfa.gap_affine @@ -354,16 +369,14 @@ cdef class WavefrontAligner: self.attributes.affine2p_penalties.gap_opening2 = gap_opening2 self.attributes.affine2p_penalties.gap_extension2 = gap_extension2 else: - print(NotImplementedError(f'{distance} distance not implemented')) - # raise NotImplementedError(f'{distance} distance not implemented') + raise NotImplementedError(f'{distance} distance not implemented') if scope == "full": self.attributes.alignment_scope = wfa.compute_alignment elif scope == "score": self.attributes.alignment_scope = wfa.compute_score self.score_only = True else: - print(ValueError(f'{scope} scope not understood')) - # raise ValueError(f'{scope} scope not understood') + raise ValueError(f'{scope} scope not understood') self.attributes.alignment_form.pattern_begin_free = pattern_begin_free self.attributes.alignment_form.pattern_end_free = pattern_end_free @@ -375,8 +388,7 @@ cdef class WavefrontAligner: elif span == "end-to-end": self.attributes.alignment_form.span = wfa.alignment_end2end else: - print(NotImplementedError(f'{span} span not implemented')) - # raise NotImplementedError(f'{span} span not implemented') + raise NotImplementedError(f'{span} span not implemented') if heuristic is None: self.attributes.heuristic.strategy = wfa.wf_heuristic_none @@ -390,8 +402,7 @@ cdef class WavefrontAligner: self.attributes.heuristic.xdrop = xdrop self.attributes.heuristic.steps_between_cutoffs = steps_between_cutoffs else: - print(NotImplementedError(f'{heuristic} heuristic not implemented')) - # raise NotImplementedError(f'{heuristic} heuristic not implemented') + raise NotImplementedError(f'{heuristic} heuristic not implemented') self.wf_aligner = wfa.wavefront_aligner_new(self.attributes) @@ -405,17 +416,18 @@ cdef class WavefrontAligner: :return: Alignment score :rtype: int """ - cdef bytes p if pattern is not None: - p = pattern.encode('ascii') - self._pattern = pattern - else: - p = self._pattern.encode('ascii') - cdef bytes t = text.encode('ascii') + self._pattern = pattern.upper() + cdef bytes p = self._pattern.encode('ascii') + cdef bytes t = text.upper().encode('ascii') self._text = text self.text_len = len(t) self.pattern_len = len(p) - wfa.wavefront_align(self.wf_aligner, p, len(p), t, len(text)) + if not self._wildcard: + wfa.wavefront_align(self.wf_aligner, p, len(p), t, len(text)) + else: + args = wildcard_fun_args(p, t, self._wildcard) + wfa.wavefront_align_lambda(self.wf_aligner, wildcard_match_fun, &args, len(p), len(text)) return self.wf_aligner.cigar.score def cigar_print_pretty(self, file_name=None): @@ -594,4 +606,5 @@ cdef class WavefrontAligner: return res def __dealloc__(self): - wfa.wavefront_aligner_delete(self.wf_aligner) + if self.wf_aligner: # if an exception is raised in the constructor, self.wf_aligner does not exist yet + wfa.wavefront_aligner_delete(self.wf_aligner) From ef7946f78416b104e4f72e17a28ffc5c6caa84eb Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Mon, 11 Dec 2023 16:36:53 +0100 Subject: [PATCH 03/11] cache the bytestring pattern as well --- pywfa/align.pxd | 1 + pywfa/align.pyx | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pywfa/align.pxd b/pywfa/align.pxd index ad8aa1c..60763a5 100644 --- a/pywfa/align.pxd +++ b/pywfa/align.pxd @@ -8,6 +8,7 @@ cdef class WavefrontAligner: cdef wfa.wavefront_aligner_attr_t* attributes cdef wfa.wavefront_aligner_t* wf_aligner cdef str _pattern, _text + cdef bytes _bpattern cdef char _wildcard cdef bint score_only cdef public int match_score, alignment_score, text_len, pattern_len diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 319445e..88907fb 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -341,6 +341,8 @@ cdef class WavefrontAligner: self.text_len = 0 if pattern: self._pattern = pattern.upper() + self._bpattern = self._pattern.encode("ascii") + self.pattern_len = len(self._bpattern) # could get a malloc version working # self.attributes = malloc(sizeof(wfa.wavefront_aligner_attr_default)) @@ -418,21 +420,20 @@ cdef class WavefrontAligner: """ if pattern is not None: self._pattern = pattern.upper() - cdef bytes p = self._pattern.encode('ascii') + self._bpattern = self._pattern.encode("ascii") + self.pattern_len = len(self._bpattern) cdef bytes t = text.upper().encode('ascii') self._text = text self.text_len = len(t) - self.pattern_len = len(p) if not self._wildcard: - wfa.wavefront_align(self.wf_aligner, p, len(p), t, len(text)) + wfa.wavefront_align(self.wf_aligner, self._bpattern, len(self._bpattern), t, len(text)) else: - args = wildcard_fun_args(p, t, self._wildcard) - wfa.wavefront_align_lambda(self.wf_aligner, wildcard_match_fun, &args, len(p), len(text)) + args = wildcard_fun_args(self._bpattern, t, self._wildcard) + wfa.wavefront_align_lambda(self.wf_aligner, wildcard_match_fun, &args, len(self._bpattern), len(text)) return self.wf_aligner.cigar.score def cigar_print_pretty(self, file_name=None): cdef bytes t = self._text.encode('ascii') - cdef bytes p = self._pattern.encode('ascii') cdef bytes fname_bytes cdef char* fname cdef FILE * outfile @@ -442,7 +443,7 @@ cdef class WavefrontAligner: outfile = fopen(fname, "w") else: outfile = stdout - wfa.cigar_print_pretty(outfile, self.wf_aligner.cigar, p, len(p), t, len(t)) + wfa.cigar_print_pretty(outfile, self.wf_aligner.cigar, self._bpattern, len(self._bpattern), t, len(t)) if file_name: fclose(outfile) From 322c6954038b16fcdd39ed25c06a8e4d754c80f3 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Mon, 11 Dec 2023 16:56:10 +0100 Subject: [PATCH 04/11] implement linear penalty --- pywfa/align.pyx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 88907fb..c03e2b6 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -317,6 +317,7 @@ cdef class WavefrontAligner: distance="affine", int match=0, int mismatch=4, + int indel=2, int gap_opening=6, int gap_extension=2, int gap_opening2=24, @@ -354,7 +355,13 @@ cdef class WavefrontAligner: raise ValueError(f"wildcard must have length 1, but has length {len(wildcard)}") self._wildcard = wildcard.upper().encode("ascii")[0] - if distance == "affine": + if distance == "linear": + self.attributes.distance_metric = wfa.gap_linear + self.attributes.linear_penalties.match = match + self.match_score = match + self.attributes.linear_penalties.mismatch = mismatch + self.attributes.linear_penalties.indel = indel + elif distance == "affine": self.attributes.distance_metric = wfa.gap_affine self.attributes.affine_penalties.match = match self.match_score = match From 5ecba94b332db8119ffd4a13b15a852765110297 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 12 Dec 2023 15:40:21 +0100 Subject: [PATCH 05/11] attempt to speed up elide_mismatches_from_cigar by static typing --- pywfa/align.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index c03e2b6..b552db0 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -269,7 +269,8 @@ cpdef elide_mismatches_from_cigar(cigartuples): return [] modified = [] cdef int l - block = 0 + cdef int opp + cdef int block = 0 for opp, l in cigartuples: if opp != 8 and opp != 0: if block: From 54fd9f488050d0939f515170ee20a66653908643 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 19 Dec 2023 13:26:46 +0100 Subject: [PATCH 06/11] expose aligner settings as properties of the object and allow them to be modified --- pywfa/align.pxd | 9 +- pywfa/align.pyx | 303 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 256 insertions(+), 56 deletions(-) diff --git a/pywfa/align.pxd b/pywfa/align.pxd index 60763a5..2ec0433 100644 --- a/pywfa/align.pxd +++ b/pywfa/align.pxd @@ -4,11 +4,8 @@ from pywfa cimport WFA_wrap as wfa cdef class WavefrontAligner: - - cdef wfa.wavefront_aligner_attr_t* attributes cdef wfa.wavefront_aligner_t* wf_aligner - cdef str _pattern, _text + cdef str _pattern, _text, _wildcard cdef bytes _bpattern - cdef char _wildcard - cdef bint score_only - cdef public int match_score, alignment_score, text_len, pattern_len + cdef char _bwildcard + cdef public int alignment_score, text_len, pattern_len diff --git a/pywfa/align.pyx b/pywfa/align.pyx index b552db0..11a97e6 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -318,7 +318,6 @@ cdef class WavefrontAligner: distance="affine", int match=0, int mismatch=4, - int indel=2, int gap_opening=6, int gap_extension=2, int gap_opening2=24, @@ -338,7 +337,6 @@ cdef class WavefrontAligner: int xdrop=20, wildcard=None ): - self.pattern_len = 0 self.text_len = 0 if pattern: @@ -348,73 +346,63 @@ cdef class WavefrontAligner: # could get a malloc version working # self.attributes = malloc(sizeof(wfa.wavefront_aligner_attr_default)) - self.attributes = &wfa.wavefront_aligner_attr_default - if wildcard is not None: - if not isinstance(wildcard, str): - raise TypeError(f"expected wildcard to be a string, but it is {type(wildcard)}") - if len(wildcard) > 1: - raise ValueError(f"wildcard must have length 1, but has length {len(wildcard)}") - self._wildcard = wildcard.upper().encode("ascii")[0] + cdef wfa.wavefront_aligner_attr_t attributes = wfa.wavefront_aligner_attr_default + self.wildcard = wildcard if distance == "linear": - self.attributes.distance_metric = wfa.gap_linear - self.attributes.linear_penalties.match = match - self.match_score = match - self.attributes.linear_penalties.mismatch = mismatch - self.attributes.linear_penalties.indel = indel + attributes.distance_metric = wfa.gap_linear + attributes.linear_penalties.match = match + attributes.linear_penalties.mismatch = mismatch + attributes.linear_penalties.indel = gap_extension elif distance == "affine": - self.attributes.distance_metric = wfa.gap_affine - self.attributes.affine_penalties.match = match - self.match_score = match - self.attributes.affine_penalties.mismatch = mismatch - self.attributes.affine_penalties.gap_opening = gap_opening - self.attributes.affine_penalties.gap_extension = gap_extension + attributes.distance_metric = wfa.gap_affine + attributes.affine_penalties.match = match + attributes.affine_penalties.mismatch = mismatch + attributes.affine_penalties.gap_opening = gap_opening + attributes.affine_penalties.gap_extension = gap_extension elif distance == "affine2p": - self.attributes.distance_metric = wfa.gap_affine_2p - self.attributes.affine2p_penalties.match = match - self.match_score = match - self.attributes.affine2p_penalties.mismatch = mismatch - self.attributes.affine2p_penalties.gap_opening1 = gap_opening - self.attributes.affine2p_penalties.gap_extension1 = gap_extension - self.attributes.affine2p_penalties.gap_opening2 = gap_opening2 - self.attributes.affine2p_penalties.gap_extension2 = gap_extension2 + attributes.distance_metric = wfa.gap_affine_2p + attributes.affine2p_penalties.match = match + attributes.affine2p_penalties.mismatch = mismatch + attributes.affine2p_penalties.gap_opening1 = gap_opening + attributes.affine2p_penalties.gap_extension1 = gap_extension + attributes.affine2p_penalties.gap_opening2 = gap_opening2 + attributes.affine2p_penalties.gap_extension2 = gap_extension2 else: raise NotImplementedError(f'{distance} distance not implemented') if scope == "full": - self.attributes.alignment_scope = wfa.compute_alignment + attributes.alignment_scope = wfa.compute_alignment elif scope == "score": - self.attributes.alignment_scope = wfa.compute_score - self.score_only = True + attributes.alignment_scope = wfa.compute_score else: raise ValueError(f'{scope} scope not understood') - self.attributes.alignment_form.pattern_begin_free = pattern_begin_free - self.attributes.alignment_form.pattern_end_free = pattern_end_free - self.attributes.alignment_form.text_begin_free = text_begin_free - self.attributes.alignment_form.text_end_free = text_end_free + attributes.alignment_form.pattern_begin_free = pattern_begin_free + attributes.alignment_form.pattern_end_free = pattern_end_free + attributes.alignment_form.text_begin_free = text_begin_free + attributes.alignment_form.text_end_free = text_end_free if span == "ends-free": - self.attributes.alignment_form.span = wfa.alignment_endsfree - + attributes.alignment_form.span = wfa.alignment_endsfree elif span == "end-to-end": - self.attributes.alignment_form.span = wfa.alignment_end2end + attributes.alignment_form.span = wfa.alignment_end2end else: raise NotImplementedError(f'{span} span not implemented') if heuristic is None: - self.attributes.heuristic.strategy = wfa.wf_heuristic_none + attributes.heuristic.strategy = wfa.wf_heuristic_none elif heuristic == "adaptive": - self.attributes.heuristic.strategy = wfa.wf_heuristic_wfadaptive - self.attributes.heuristic.min_wavefront_length = min_wavefront_length - self.attributes.heuristic.max_distance_threshold = max_distance_threshold - self.attributes.heuristic.steps_between_cutoffs = steps_between_cutoffs + attributes.heuristic.strategy = wfa.wf_heuristic_wfadaptive + attributes.heuristic.min_wavefront_length = min_wavefront_length + attributes.heuristic.max_distance_threshold = max_distance_threshold + attributes.heuristic.steps_between_cutoffs = steps_between_cutoffs elif heuristic == "X-drop": - self.attributes.heuristic.strategy = wfa.wf_heuristic_xdrop - self.attributes.heuristic.xdrop = xdrop - self.attributes.heuristic.steps_between_cutoffs = steps_between_cutoffs + attributes.heuristic.strategy = wfa.wf_heuristic_xdrop + attributes.heuristic.xdrop = xdrop + attributes.heuristic.steps_between_cutoffs = steps_between_cutoffs else: raise NotImplementedError(f'{heuristic} heuristic not implemented') - self.wf_aligner = wfa.wavefront_aligner_new(self.attributes) + self.wf_aligner = wfa.wavefront_aligner_new(&attributes) def wavefront_align(self, text, pattern=None): """Perform wavefront alignment. @@ -436,7 +424,7 @@ cdef class WavefrontAligner: if not self._wildcard: wfa.wavefront_align(self.wf_aligner, self._bpattern, len(self._bpattern), t, len(text)) else: - args = wildcard_fun_args(self._bpattern, t, self._wildcard) + args = wildcard_fun_args(self._bpattern, t, self._bwildcard) wfa.wavefront_align_lambda(self.wf_aligner, wildcard_match_fun, &args, len(self._bpattern), len(text)) return self.wf_aligner.cigar.score @@ -464,6 +452,221 @@ cdef class WavefrontAligner: def score(self): return self.wf_aligner.cigar.score + @property + def pattern_begin_free(self): + return self.wf_aligner.alignment_form.pattern_begin_free + + @pattern_begin_free.setter + def pattern_begin_free(self, int pattern_begin_free): + self.wf_aligner.alignment_form.pattern_begin_free = pattern_begin_free + + @property + def pattern_end_free(self): + return self.wf_aligner.alignment_form.pattern_end_free + + @pattern_end_free.setter + def pattern_end_free(self, int pattern_end_free): + self.wf_aligner.alignment_form.pattern_end_free = pattern_end_free + + @property + def text_begin_free(self): + return self.wf_aligner.alignment_form.text_begin_free + + @text_begin_free.setter + def text_begin_free(self, int text_begin_free): + self.wf_aligner.alignment_form.text_begin_free = text_begin_free + + @property + def text_end_free(self): + return self.wf_aligner.alignment_form.text_end_free + + @text_end_free.setter + def text_end_free(self, int text_end_free): + self.wf_aligner.alignment_form.text_end_free = text_end_free + + @property + def scope(self): + if self.wf_aligner.alignment_scope == wfa.compute_alignment: + return "full" + else: + return "score" + + @scope.setter + def scope(self, scope): + if scope == "full": + self.wf_aligner.alignment_scope = wfa.compute_alignment + elif scope == "score": + self.wf_aligner.alignment_scope = wfa.compute_score + else: + raise ValueError(f'{scope} scope not understood') + + @property + def span(self): + if self.wf_aligner.alignment_form.span == wfa.alignment_endsfree: + return "ends-free" + elif self.wf_aligner.alignment_form.span == wfa.alignment_end2end: + return "end-to-end" + + @span.setter + def span(self, span): + if span == "ends-free": + self.wf_aligner.alignment_form.span = wfa.alignment_endsfree + + elif span == "end-to-end": + self.wf_aligner.alignment_form.span = wfa.alignment_end2end + else: + raise NotImplementedError(f'{span} span not implemented') + + @property + def heuristic(self): + if self.wf_aligner.heuristic.strategy == wfa.wf_heuristic_none: + return None + elif self.wf_aligner.heuristic.strategy == wfa.wf_heuristic_wfadaptive: + return "adaptive" + elif self.wf_aligner.heuristic.strategy == wfa.wf_heuristic_xdrop: + return "X-drop" + + @heuristic.setter + def heuristic(self, heuristic): + if heuristic is None: + self.wf_aligner.heuristic.strategy = wfa.wf_heuristic_none + elif heuristic == "adaptive": + self.wf_aligner.heuristic.strategy = wfa.wf_heuristic_wfadaptive + elif heuristic == "X-drop": + self.wf_aligner.heuristic.strategy = wfa.wf_heuristic_xdrop + else: + raise NotImplementedError(f'{heuristic} heuristic not implemented') + + @property + def min_wavefront_length(self): + return self.wf_aligner.heuristic.min_wavefront_length + + @min_wavefront_length.setter + def min_wavefront_length(self, int length): + self.wf_aligner.heuristic.min_wavefront_length = length + + @property + def max_distance_threshold(self): + return self.wf_aligner.heuristic.max_distance_threshold + + @max_distance_threshold.setter + def max_distance_threshold(self, int thresh): + self.wf_aligner.heuristic.max_distance_threshold = thresh + + @property + def steps_between_cutoffs(self): + return self.wf_aligner.heuristic.steps_between_cutoffs + + @steps_between_cutoffs.setter + def steps_between_cutoffs(self, int steps): + self.wf_aligner.heuristic.steps_between_cutoffs = steps + + @property + def xdrop(self): + return self.wf_aligner.heuristic.xdrop + + @xdrop.setter + def xdrop(self, int xdrop): + self.wf_aligner.heuristic.xdrop = xdrop + + @property + def distance(self): + if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: + return "linear" + elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine: + return "affine" + elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine_2p: + return "affine2p" + + @distance.setter + def distance(self, distance): + if distance == "linear": + self.wf_aligner.penalties.distance_metric = wfa.gap_linear + elif distance == "affine": + self.wf_aligner.penalties.distance_metric = wfa.gap_affine + elif distance == "affine2p": + self.wf_aligner.penalties.distance_metric = wfa.gap_affine_2p + else: + raise NotImplementedError(f'{distance} distance not implemented') + + def _edit_penalties(self): + if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: + wfa.wavefront_penalties_set_linear(&self.wf_aligner.penalties, &self.wf_aligner.penalties.linear_penalties) + elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine: + wfa.wavefront_penalties_set_affine(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine_penalties) + elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine_2p: + wfa.wavefront_penalties_set_affine2p(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine2p_penalties) + + @property + def match_score(self): + return self.wf_aligner.penalties.match + + @match_score.setter + def match_score(self, int match): + self.wf_aligner.penalties.linear_penalties.match = self.wf_aligner.penalties.affine_penalties.match = self.wf_aligner.penalties.affine2p_penalties.match = match + self._edit_penalties() + + @property + def mismatch_penalty(self): + return self.wf_aligner.penalties.mismatch + + @mismatch_penalty.setter + def mismatch_penalty(self, int mismatch): + self.wf_aligner.penalties.linear_penalties.mismatch = self.wf_aligner.penalties.affine_penalties.mismatch = self.wf_aligner.penalties.affine2p_penalties.mismatch = mismatch + self._edit_penalties() + + @property + def gap_opening_penalty(self): + return self.wf_aligner.penalties.gap_opening1 + + @gap_opening_penalty.setter + def gap_opening_penalty(self, int penalty): + self.wf_aligner.penalties.linear_penalties.indel = self.wf_aligner.penalties.affine_penalties.gap_opening = self.wf_aligner.penalties.affine2p_penalties.gap_opening1 = penalty + self._edit_penalties() + + @property + def gap_extension_penalty(self): + return self.wf_aligner.penalties.gap_extension1 + + @gap_extension_penalty.setter + def gap_extension_penalty(self, int penalty): + self.wf_aligner.penalties.linear_penalties.indel = self.wf_aligner.penalties.affine_penalties.gap_extension = self.wf_aligner.penalties.affine2p_penalties.gap_extension1 = penalty + self._edit_penalties() + + @property + def gap_opening2_penalty(self): + return self.wf_aligner.penalties.gap_opening2 + + @gap_opening2_penalty.setter + def gap_opening2_penalty(self, int penalty): + self.wf_aligner.penalties.affine2p_penalties.gap_opening2 = penalty + self._edit_penalties() + + @property + def gap_extension2_penalty(self): + return self.wf_aligner.penalties.gap_extension2 + + @gap_extension2_penalty.setter + def gap_extension2_penalty(self, int penalty): + self.wf_aligner.penalties.affine2p_penalties.gap_extension2 = penalty + self._edit_penalties() + + @property + def wildcard(self): + return self._wildcard + + @wildcard.setter + def wildcard(self, wildcard): + if wildcard is not None: + if not isinstance(wildcard, str): + raise TypeError(f"expected wildcard to be a string, but it is {type(wildcard)}") + if len(wildcard) > 1: + raise ValueError(f"wildcard must have length 1, but has length {len(wildcard)}") + self._wildcard = wildcard + self._bwildcard = wildcard.upper().encode("ascii")[0] + else: + self._wildcard = None + @property def cigarstring(self): cdef wfa.cigar_t* cigar @@ -523,7 +726,7 @@ cdef class WavefrontAligner: @property def locations(self): - if self.score_only: + if self.scope == "score": return [0, 0, 0, 0] cigartuples = self.cigartuples if not cigartuples or self.text_len == 0 or self.pattern_len == 0: @@ -607,7 +810,7 @@ cdef class WavefrontAligner: res = AlignmentResult(lp, len(text), locs[0], locs[1], locs[2], locs[3], ct, score, "", "", status) else: res = AlignmentResult(lp, len(text), locs[0], locs[1], locs[2], locs[3], ct, score, p, text, status) - if not self.score_only: + if not self.scope == "full": if clip_cigar: res = clip_cigartuples(res, min_aligned_bases_left, min_aligned_bases_right) if elide_mismatches: From 435826f836430db55d7b9ff3c96b9869e5fbf165 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 19 Dec 2023 16:42:06 +0100 Subject: [PATCH 07/11] expose max_alignment_steps to Python --- pywfa/align.pyx | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 11a97e6..87f78e2 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -4,6 +4,7 @@ from __future__ import division, print_function, absolute_import from pywfa cimport WFA_wrap as wfa from dataclasses import dataclass from libc.stdio cimport stdout, FILE +from libc.limits cimport INT_MAX __all__ = ["WavefrontAligner", "clip_cigartuples", "cigartuples_to_str", "elide_mismatches_from_cigar"] @@ -335,7 +336,8 @@ cdef class WavefrontAligner: int max_distance_threshold=50, int steps_between_cutoffs=1, int xdrop=20, - wildcard=None + wildcard=None, + int max_steps=0 ): self.pattern_len = 0 self.text_len = 0 @@ -402,6 +404,10 @@ cdef class WavefrontAligner: else: raise NotImplementedError(f'{heuristic} heuristic not implemented') + if max_steps <= 0: + max_steps = INT_MAX + attributes.system.max_alignment_steps = max_steps + self.wf_aligner = wfa.wavefront_aligner_new(&attributes) def wavefront_align(self, text, pattern=None): @@ -667,6 +673,16 @@ cdef class WavefrontAligner: else: self._wildcard = None + @property + def max_steps(self): + return self.wf_aligner.system.max_alignment_steps + + @max_steps.setter + def max_steps(self, int steps): + if steps <= 0: + steps = INT_MAX + wfa.wavefront_aligner_set_max_alignment_steps(self.wf_aligner, steps) + @property def cigarstring(self): cdef wfa.cigar_t* cigar From 35d46a245a9ee973b86e48c95b3a906821d92ee1 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 19 Dec 2023 17:00:32 +0100 Subject: [PATCH 08/11] remove unnecessary manual libc wraping --- pywfa/align.pyx | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 87f78e2..1d7c7d7 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -3,18 +3,12 @@ from __future__ import division, print_function, absolute_import from pywfa cimport WFA_wrap as wfa from dataclasses import dataclass -from libc.stdio cimport stdout, FILE +from libc.stdio cimport stdout, FILE, fopen, fclose, fputs from libc.limits cimport INT_MAX __all__ = ["WavefrontAligner", "clip_cigartuples", "cigartuples_to_str", "elide_mismatches_from_cigar"] -cdef extern from "stdio.h": - FILE *fopen(const char *, const char *) - int fclose(FILE *) - int fputs(const char *, FILE *) - - cdef int[89] codes codes[:] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,9,0,2,0,0,0,5,1, From 5b5e9c02b2b7dd333fbe6375dd85a95ab51600ec Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 19 Dec 2023 17:03:55 +0100 Subject: [PATCH 09/11] remove unnecessary python2-era __future__ imports --- pywfa/align.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 1d7c7d7..2e95634 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -1,6 +1,5 @@ #cython: language_level=3, boundscheck=False, wraparound=False, nonecheck=False -from __future__ import division, print_function, absolute_import from pywfa cimport WFA_wrap as wfa from dataclasses import dataclass from libc.stdio cimport stdout, FILE, fopen, fclose, fputs From 7a4fdff5cdf702457608f5294b8dc7d5b516a49f Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 19 Dec 2023 17:27:30 +0100 Subject: [PATCH 10/11] fix changing the distance metric --- pywfa/align.pyx | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index 2e95634..dc9e559 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -568,6 +568,14 @@ cdef class WavefrontAligner: def xdrop(self, int xdrop): self.wf_aligner.heuristic.xdrop = xdrop + def _edit_penalties(self): + if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: + wfa.wavefront_penalties_set_linear(&self.wf_aligner.penalties, &self.wf_aligner.penalties.linear_penalties) + elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine: + wfa.wavefront_penalties_set_affine(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine_penalties) + elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine_2p: + wfa.wavefront_penalties_set_affine2p(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine2p_penalties) + @property def distance(self): if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: @@ -587,14 +595,7 @@ cdef class WavefrontAligner: self.wf_aligner.penalties.distance_metric = wfa.gap_affine_2p else: raise NotImplementedError(f'{distance} distance not implemented') - - def _edit_penalties(self): - if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: - wfa.wavefront_penalties_set_linear(&self.wf_aligner.penalties, &self.wf_aligner.penalties.linear_penalties) - elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine: - wfa.wavefront_penalties_set_affine(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine_penalties) - elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine_2p: - wfa.wavefront_penalties_set_affine2p(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine2p_penalties) + self._edit_penalties() @property def match_score(self): From 02cb9a8229c413e39c5c2e4900ec41a882b5f96c Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Thu, 21 Dec 2023 12:02:23 +0100 Subject: [PATCH 11/11] expose indel and levenshtein distance metrics to Python --- pywfa/align.pyx | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/pywfa/align.pyx b/pywfa/align.pyx index dc9e559..d002698 100644 --- a/pywfa/align.pyx +++ b/pywfa/align.pyx @@ -344,7 +344,11 @@ cdef class WavefrontAligner: cdef wfa.wavefront_aligner_attr_t attributes = wfa.wavefront_aligner_attr_default self.wildcard = wildcard - if distance == "linear": + if distance == "indel": + attributes.distance_metric = wfa.indel + elif distance == "levenshtein": + attributes.distance_metric = wfa.edit + elif distance == "linear": attributes.distance_metric = wfa.gap_linear attributes.linear_penalties.match = match attributes.linear_penalties.mismatch = mismatch @@ -569,7 +573,11 @@ cdef class WavefrontAligner: self.wf_aligner.heuristic.xdrop = xdrop def _edit_penalties(self): - if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: + if self.wf_aligner.penalties.distance_metric == wfa.indel: + wfa.wavefront_penalties_set_indel(&self.wf_aligner.penalties) + elif self.wf_aligner.penalties.distance_metric == wfa.edit: + wfa.wavefront_penalties_set_edit(&self.wf_aligner.penalties) + elif self.wf_aligner.penalties.distance_metric == wfa.gap_linear: wfa.wavefront_penalties_set_linear(&self.wf_aligner.penalties, &self.wf_aligner.penalties.linear_penalties) elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine: wfa.wavefront_penalties_set_affine(&self.wf_aligner.penalties, &self.wf_aligner.penalties.affine_penalties) @@ -578,7 +586,11 @@ cdef class WavefrontAligner: @property def distance(self): - if self.wf_aligner.penalties.distance_metric == wfa.gap_linear: + if self.wf_aligner.penalties.distance_metric == wfa.indel: + return "indel" + elif self.wf_aligner.penalties.distance_metric == wfa.edit: + return "levenshtein" + elif self.wf_aligner.penalties.distance_metric == wfa.gap_linear: return "linear" elif self.wf_aligner.penalties.distance_metric == wfa.gap_affine: return "affine" @@ -587,7 +599,11 @@ cdef class WavefrontAligner: @distance.setter def distance(self, distance): - if distance == "linear": + if distance == "indel": + self.wf_aligner.penalties.distance_metric = wfa.indel + elif distance == "levenshtein": + self.wf_aligner.penalties.distance_metric = wfa.edit + elif distance == "linear": self.wf_aligner.penalties.distance_metric = wfa.gap_linear elif distance == "affine": self.wf_aligner.penalties.distance_metric = wfa.gap_affine