LCOV - code coverage report
Current view: top level - core/search - SPSearchDistanceEdLib.cc (source / functions) Hit Total Coverage
Test: coverage.info Lines: 527 574 91.8 %
Date: 2024-05-12 00:16:13 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /**
       2             : Copyright (c) 2014 Martin Šošić
       3             : Copyright (c) 2017-2022 Roman Katuntsev <sbkarr@stappler.org>
       4             : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
       5             : 
       6             : Permission is hereby granted, free of charge, to any person obtaining a copy
       7             : of this software and associated documentation files (the "Software"), to deal
       8             : in the Software without restriction, including without limitation the rights
       9             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      10             : copies of the Software, and to permit persons to whom the Software is
      11             : furnished to do so, subject to the following conditions:
      12             : 
      13             : The above copyright notice and this permission notice shall be included in
      14             : all copies or substantial portions of the Software.
      15             : 
      16             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      17             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      18             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      19             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      20             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      21             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      22             : THE SOFTWARE.
      23             : **/
      24             : 
      25             : #include "SPCommon.h"
      26             : #include "SPSearchDistance.h"
      27             : 
      28             : namespace STAPPLER_VERSIONIZED stappler::search {
      29             : 
      30             : // from https://github.com/Martinsos/edlib
      31             : 
      32             : using bytes = memory::bytes;
      33             : 
      34             : typedef uint64_t Word;
      35             : static const int WORD_SIZE = sizeof(Word) * 8; // Size of Word in bits
      36             : static const Word WORD_1 = static_cast<Word>(1);
      37             : static const Word HIGH_BIT_MASK = WORD_1 << (WORD_SIZE - 1);  // 100..00
      38             : static const int MAX_UCHAR = 255;
      39             : 
      40             : // Status codes
      41             : #define EDLIB_STATUS_OK 0
      42             : #define EDLIB_STATUS_ERROR 1
      43             : 
      44             : // Data needed to find alignment.
      45             : struct AlignmentData {
      46             :     Word* Ps;
      47             :     Word* Ms;
      48             :     int* scores;
      49             :     int* firstBlocks;
      50             :     int* lastBlocks;
      51             : 
      52        5325 :     AlignmentData(int maxNumBlocks, int targetLength) {
      53             :         // We build a complete table and mark first and last block for each column
      54             :         // (because algorithm is banded so only part of each columns is used).
      55             :         // TODO: do not build a whole table, but just enough blocks for each column.
      56        5325 :          Ps     = new Word[maxNumBlocks * targetLength];
      57        5325 :          Ms     = new Word[maxNumBlocks * targetLength];
      58        5325 :          scores = new  int[maxNumBlocks * targetLength];
      59        5325 :          firstBlocks = new int[targetLength];
      60        5325 :          lastBlocks  = new int[targetLength];
      61        5325 :     }
      62             : 
      63        5325 :     ~AlignmentData() {
      64        5325 :         delete[] Ps;
      65        5325 :         delete[] Ms;
      66        5325 :         delete[] scores;
      67        5325 :         delete[] firstBlocks;
      68        5325 :         delete[] lastBlocks;
      69        5325 :     }
      70             : };
      71             : 
      72             : struct Block {
      73             :     Word P;  // Pvin
      74             :     Word M;  // Mvin
      75             :     int score; // score of last cell in block;
      76             : 
      77      688125 :     Block() {}
      78       11650 :     Block(Word p, Word m, int s) :P(p), M(m), score(s) {}
      79             : };
      80             : /**
      81             :  * Alignment methods - how should Edlib treat gaps before and after query?
      82             :  */
      83             : typedef enum {
      84             :         EDLIB_MODE_NW,
      85             : } EdlibAlignMode;
      86             : 
      87             : /**
      88             :  * Alignment tasks - what do you want Edlib to do?
      89             :  */
      90             : typedef enum {
      91             :         EDLIB_TASK_DISTANCE,  //!< Find edit distance and end locations.
      92             :         EDLIB_TASK_LOC,       //!< Find edit distance, end locations and start locations.
      93             :         EDLIB_TASK_PATH       //!< Find edit distance, end locations and start locations and alignment path.
      94             : } EdlibAlignTask;
      95             : 
      96             : /**
      97             :  * Describes cigar format.
      98             :  * @see http://samtools.github.io/hts-specs/SAMv1.pdf
      99             :  * @see http://drive5.com/usearch/manual/cigar.html
     100             :  */
     101             : typedef enum {
     102             :         EDLIB_CIGAR_STANDARD,  //!< Match: 'M', Insertion: 'I', Deletion: 'D', Mismatch: 'M'.
     103             :         EDLIB_CIGAR_EXTENDED   //!< Match: '=', Insertion: 'I', Deletion: 'D', Mismatch: 'X'.
     104             : } EdlibCigarFormat;
     105             : 
     106             : // Edit operations.
     107             : #define EDLIB_EDOP_MATCH 0    //!< Match.
     108             : #define EDLIB_EDOP_INSERT 1   //!< Insertion to target = deletion from query.
     109             : #define EDLIB_EDOP_DELETE 2   //!< Deletion from target = insertion to query.
     110             : #define EDLIB_EDOP_MISMATCH 3 //!< Mismatch.
     111             : 
     112             : /**
     113             :  * Container for results of alignment done by edlibAlign() function.
     114             :  */
     115             : typedef struct {
     116             :         /**
     117             :          * EDLIB_STATUS_OK or EDLIB_STATUS_ERROR. If error, all other fields will have undefined values.
     118             :          */
     119             :         int status;
     120             : 
     121             :         /**
     122             :          * -1 if k is non-negative and edit distance is larger than k.
     123             :          */
     124             :         int editDistance;
     125             : 
     126             :         /**
     127             :          * Array of zero-based positions in target where optimal alignment paths end.
     128             :          * If gap after query is penalized, gap counts as part of query (NW), otherwise not.
     129             :          * Set to NULL if edit distance is larger than k.
     130             :          * If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
     131             :          */
     132             :         int *endLocations;
     133             : 
     134             :         /**
     135             :          * Array of zero-based positions in target where optimal alignment paths start,
     136             :          * they correspond to endLocations.
     137             :          * If gap before query is penalized, gap counts as part of query (NW), otherwise not.
     138             :          * Set to NULL if not calculated or if edit distance is larger than k.
     139             :          * If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
     140             :          */
     141             :         int *startLocations;
     142             : 
     143             :         /**
     144             :          * Number of end (and start) locations.
     145             :          */
     146             :         int numLocations;
     147             : 
     148             :         /**
     149             :          * Alignment is found for first pair of start and end locations.
     150             :          * Set to NULL if not calculated.
     151             :          * Alignment is sequence of numbers: 0, 1, 2, 3.
     152             :          * 0 stands for match.
     153             :          * 1 stands for insertion to target.
     154             :          * 2 stands for insertion to query.
     155             :          * 3 stands for mismatch.
     156             :          * Alignment aligns query to target from begining of query till end of query.
     157             :          * If gaps are not penalized, they are not in alignment.
     158             :          * If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
     159             :          */
     160             :         unsigned char *alignment;
     161             : 
     162             :         /**
     163             :          * Length of alignment.
     164             :          */
     165             :         int alignmentLength;
     166             : 
     167             :         /**
     168             :          * Number of different characters in query and target together.
     169             :          */
     170             :         int alphabetLength;
     171             : } EdlibAlignResult;
     172             : 
     173             : typedef struct {
     174             :         char first;
     175             :         char second;
     176             : } EdlibEqualityPair;
     177             : 
     178             : /**
     179             :  * @brief Configuration object for edlibAlign() function.
     180             :  */
     181             : typedef struct {
     182             :         /**
     183             :          * Set k to non-negative value to tell edlib that edit distance is not larger than k.
     184             :          * Smaller k can significantly improve speed of computation.
     185             :          * If edit distance is larger than k, edlib will set edit distance to -1.
     186             :          * Set k to negative value and edlib will internally auto-adjust k until score is found.
     187             :          */
     188             :         int k;
     189             : 
     190             :         /**
     191             :          * Alignment method.
     192             :          * EDLIB_MODE_NW: global (Needleman-Wunsch)
     193             :          * EDLIB_MODE_SHW: prefix. Gap after query is not penalized.
     194             :          * EDLIB_MODE_HW: infix. Gaps before and after query are not penalized.
     195             :          */
     196             :         EdlibAlignMode mode;
     197             : 
     198             :         /**
     199             :          * Alignment task - tells Edlib what to calculate. Less to calculate, faster it is.
     200             :          * EDLIB_TASK_DISTANCE - find edit distance and end locations of optimal alignment paths in target.
     201             :          * EDLIB_TASK_LOC - find edit distance and start and end locations of optimal alignment paths in target.
     202             :          * EDLIB_TASK_PATH - find edit distance, alignment path (and start and end locations of it in target).
     203             :          */
     204             :         EdlibAlignTask task;
     205             : } EdlibAlignConfig;
     206             : 
     207             : /**
     208             :  * Defines equality relation on alphabet characters.
     209             :  * By default each character is always equal only to itself, but you can also provide additional equalities.
     210             :  */
     211             : class EqualityDefinition {
     212             : private:
     213             :     bool matrix[MAX_UCHAR + 1][MAX_UCHAR + 1];
     214             : public:
     215         450 :     EqualityDefinition(const std::string& alphabet) {
     216        7600 :         for (int i = 0; i < static_cast<int>(alphabet.size()); i++) {
     217      235250 :             for (int j = 0; j < static_cast<int>(alphabet.size()); j++) {
     218      228100 :                 matrix[i][j] = (i == j);
     219             :             }
     220             :         }
     221         450 :     }
     222             : 
     223             :     /**
     224             :      * @param a  Element from transformed sequence.
     225             :      * @param b  Element from transformed sequence.
     226             :      * @return True if a and b are defined as equal, false otherwise.
     227             :      */
     228   755832350 :     bool areEqual(unsigned char a, unsigned char b) const {
     229   755832350 :         return matrix[a][b];
     230             :     }
     231             : };
     232             : 
     233             : static int myersCalcEditDistanceNW(const Word* Peq, int W, int maxNumBlocks,
     234             :                                    int queryLength,
     235             :                                    const unsigned char* target, int targetLength,
     236             :                                    int k, int* bestScore_,
     237             :                                    int* position_, bool findAlignment,
     238             :                                    AlignmentData** alignData, int targetStopPosition);
     239             : 
     240             : 
     241             : static int obtainAlignment(
     242             :         const unsigned char* query, const unsigned char* rQuery, int queryLength,
     243             :         const unsigned char* target, const unsigned char* rTarget, int targetLength,
     244             :         const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore,
     245             :         unsigned char** alignment, int* alignmentLength);
     246             : 
     247             : static int obtainAlignmentHirschberg(
     248             :         const unsigned char* query, const unsigned char* rQuery, int queryLength,
     249             :         const unsigned char* target, const unsigned char* rTarget, int targetLength,
     250             :         const EqualityDefinition& equalityDefinition, int alphabetLength, int bestScore,
     251             :         unsigned char** alignment, int* alignmentLength);
     252             : 
     253             : static int obtainAlignmentTraceback(int queryLength, int targetLength,
     254             :                                     int bestScore, const AlignmentData* alignData,
     255             :                                     unsigned char** alignment, int* alignmentLength);
     256             : 
     257             : static std::string transformSequences(const char* queryOriginal, int queryLength,
     258             :                                  const char* targetOriginal, int targetLength,
     259             :                                  unsigned char** queryTransformed,
     260             :                                  unsigned char** targetTransformed);
     261             : 
     262             : static inline int ceilDiv(int x, int y);
     263             : 
     264             : static inline unsigned char* createReverseCopy(const unsigned char* seq, int length);
     265             : 
     266             : static inline Word* buildPeq(const int alphabetLength,
     267             :                              const unsigned char* query,
     268             :                              const int queryLength,
     269             :                              const EqualityDefinition& equalityDefinition);
     270             : 
     271             : 
     272             : /**
     273             :  * Main edlib method.
     274             :  */
     275         500 : static EdlibAlignResult edlibAlign(const char* const queryOriginal, const int queryLength,
     276             :                                        const char* const targetOriginal, const int targetLength,
     277             :                                        const EdlibAlignConfig config) {
     278             :     EdlibAlignResult result;
     279         500 :     result.status = EDLIB_STATUS_OK;
     280         500 :     result.editDistance = -1;
     281         500 :     result.endLocations = result.startLocations = NULL;
     282         500 :     result.numLocations = 0;
     283         500 :     result.alignment = NULL;
     284         500 :     result.alignmentLength = 0;
     285         500 :     result.alphabetLength = 0;
     286             : 
     287             :     /*------------ TRANSFORM SEQUENCES AND RECOGNIZE ALPHABET -----------*/
     288             :     unsigned char* query, * target;
     289             :     std::string alphabet = transformSequences(queryOriginal, queryLength, targetOriginal, targetLength,
     290         500 :                                          &query, &target);
     291         500 :     result.alphabetLength = static_cast<int>(alphabet.size());
     292             :     /*-------------------------------------------------------*/
     293             : 
     294             :     // Handle special situation when at least one of the sequences has length 0.
     295         500 :     if (queryLength == 0 || targetLength == 0) {
     296          50 :                 result.editDistance = std::max(queryLength, targetLength);
     297          50 :                 result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
     298          50 :                 result.endLocations[0] = targetLength - 1;
     299          50 :                 result.numLocations = 1;
     300             : 
     301          50 :         free(query);
     302          50 :         free(target);
     303          50 :         return result;
     304             :     }
     305             : 
     306             :     /*--------------------- INITIALIZATION ------------------*/
     307         450 :     int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); // bmax in Myers
     308         450 :     int W = maxNumBlocks * WORD_SIZE - queryLength; // number of redundant cells in last level blocks
     309         450 :     EqualityDefinition equalityDefinition(alphabet);
     310         450 :     Word* Peq = buildPeq(static_cast<int>(alphabet.size()), query, queryLength, equalityDefinition);
     311             :     /*-------------------------------------------------------*/
     312             : 
     313             :     /*------------------ MAIN CALCULATION -------------------*/
     314             :     // TODO: Store alignment data only after k is determined? That could make things faster.
     315             :     int positionNW; // Used only when mode is NW.
     316         450 :     AlignmentData* alignData = NULL;
     317         450 :     bool dynamicK = false;
     318         450 :     int k = config.k;
     319         450 :     if (k < 0) { // If valid k is not given, auto-adjust k until solution is found.
     320         450 :         dynamicK = true;
     321         450 :         k = WORD_SIZE; // Gives better results than smaller k.
     322             :     }
     323             : 
     324             :     do {
     325         900 :                 myersCalcEditDistanceNW(Peq, W, maxNumBlocks, queryLength, target, targetLength,
     326             :                                 k, &(result.editDistance), &positionNW, false, &alignData, -1);
     327         900 :                 k *= 2;
     328         900 :     } while(dynamicK && result.editDistance == -1);
     329             : 
     330         450 :     if (result.editDistance >= 0) {  // If there is solution.
     331             :         // If NW mode, set end location explicitly.
     332         450 :         if (config.mode == EDLIB_MODE_NW) {
     333         450 :             result.endLocations = static_cast<int *>(malloc(sizeof(int) * 1));
     334         450 :             result.endLocations[0] = targetLength - 1;
     335         450 :             result.numLocations = 1;
     336             :         }
     337             : 
     338             :         // Find starting locations.
     339         450 :         if (config.task == EDLIB_TASK_LOC || config.task == EDLIB_TASK_PATH) {
     340         450 :             result.startLocations = static_cast<int *>(malloc(result.numLocations * sizeof(int)));
     341         900 :                         for (int i = 0; i < result.numLocations; i++) {
     342         450 :                                 result.startLocations[i] = 0;
     343             :                         }
     344             :         }
     345             : 
     346             :         // Find alignment -> all comes down to finding alignment for NW.
     347             :         // Currently we return alignment only for first pair of locations.
     348         450 :         if (config.task == EDLIB_TASK_PATH) {
     349         450 :             int alnStartLocation = result.startLocations[0];
     350         450 :             int alnEndLocation = result.endLocations[0];
     351         450 :             const unsigned char* alnTarget = target + alnStartLocation;
     352         450 :             const int alnTargetLength = alnEndLocation - alnStartLocation + 1;
     353         450 :             const unsigned char* rAlnTarget = createReverseCopy(alnTarget, alnTargetLength);
     354         450 :             const unsigned char* rQuery  = createReverseCopy(query, queryLength);
     355         450 :             obtainAlignment(query, rQuery, queryLength,
     356             :                             alnTarget, rAlnTarget, alnTargetLength,
     357         450 :                             equalityDefinition, static_cast<int>(alphabet.size()), result.editDistance,
     358             :                             &(result.alignment), &(result.alignmentLength));
     359         450 :             delete[] rAlnTarget;
     360         450 :             delete[] rQuery;
     361             :         }
     362             :     }
     363             :     /*-------------------------------------------------------*/
     364             : 
     365             :     //--- Free memory ---//
     366         450 :     delete[] Peq;
     367         450 :     free(query);
     368         450 :     free(target);
     369         450 :     if (alignData) delete alignData;
     370             :     //-------------------//
     371             : 
     372         450 :     return result;
     373         500 : }
     374             : 
     375             : /**
     376             :  * Build Peq table for given query and alphabet.
     377             :  * Peq is table of dimensions alphabetLength+1 x maxNumBlocks.
     378             :  * Bit i of Peq[s * maxNumBlocks + b] is 1 if i-th symbol from block b of query equals symbol s, otherwise it is 0.
     379             :  * NOTICE: free returned array with delete[]!
     380             :  */
     381        5775 : static inline Word* buildPeq(const int alphabetLength,
     382             :                              const unsigned char* const query,
     383             :                              const int queryLength,
     384             :                              const EqualityDefinition& equalityDefinition) {
     385        5775 :     int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
     386             :     // table of dimensions alphabetLength+1 x maxNumBlocks. Last symbol is wildcard.
     387        5775 :     Word* Peq = new Word[(alphabetLength + 1) * maxNumBlocks];
     388             : 
     389             :     // Build Peq (1 is match, 0 is mismatch). NOTE: last column is wildcard(symbol that matches anything) with just 1s
     390      134300 :     for (int symbol = 0; symbol <= alphabetLength; symbol++) {
     391    12530450 :         for (int b = 0; b < maxNumBlocks; b++) {
     392    12401925 :             if (symbol < alphabetLength) {
     393    11866050 :                 Peq[symbol * maxNumBlocks + b] = 0;
     394   771293250 :                 for (int r = (b+1) * WORD_SIZE - 1; r >= b * WORD_SIZE; r--) {
     395   759427200 :                     Peq[symbol * maxNumBlocks + b] <<= 1;
     396             :                     // NOTE: We pretend like query is padded at the end with W wildcard symbols
     397   759427200 :                     if (r >= queryLength || equalityDefinition.areEqual(query[r], symbol))
     398    37712300 :                         Peq[symbol * maxNumBlocks + b] += 1;
     399             :                 }
     400             :             } else { // Last symbol is wildcard, so it is all 1s
     401      535875 :                 Peq[symbol * maxNumBlocks + b] = static_cast<Word>(-1);
     402             :             }
     403             :         }
     404             :     }
     405             : 
     406        5775 :     return Peq;
     407             : }
     408             : 
     409             : 
     410             : /**
     411             :  * Returns new sequence that is reverse of given sequence.
     412             :  * Free returned array with delete[].
     413             :  */
     414         900 : static inline unsigned char* createReverseCopy(const unsigned char* const seq, const int length) {
     415         900 :     unsigned char* rSeq = new unsigned char[length];
     416     5144400 :     for (int i = 0; i < length; i++) {
     417     5143500 :         rSeq[i] = seq[length - i - 1];
     418             :     }
     419         900 :     return rSeq;
     420             : }
     421             : 
     422             : /**
     423             :  * Corresponds to Advance_Block function from Myers.
     424             :  * Calculates one word(block), which is part of a column.
     425             :  * Highest bit of word (one most to the left) is most bottom cell of block from column.
     426             :  * Pv[i] and Mv[i] define vin of cell[i]: vin = cell[i] - cell[i-1].
     427             :  * @param [in] Pv  Bitset, Pv[i] == 1 if vin is +1, otherwise Pv[i] == 0.
     428             :  * @param [in] Mv  Bitset, Mv[i] == 1 if vin is -1, otherwise Mv[i] == 0.
     429             :  * @param [in] Eq  Bitset, Eq[i] == 1 if match, 0 if mismatch.
     430             :  * @param [in] hin  Will be +1, 0 or -1.
     431             :  * @param [out] PvOut  Bitset, PvOut[i] == 1 if vout is +1, otherwise PvOut[i] == 0.
     432             :  * @param [out] MvOut  Bitset, MvOut[i] == 1 if vout is -1, otherwise MvOut[i] == 0.
     433             :  * @param [out] hout  Will be +1, 0 or -1.
     434             :  */
     435   356436175 : static inline int calculateBlock(Word Pv, Word Mv, Word Eq, const int hin,
     436             :                                  Word &PvOut, Word &MvOut) {
     437             :     // hin can be 1, -1 or 0.
     438             :     // 1  -> 00...01
     439             :     // 0  -> 00...00
     440             :     // -1 -> 11...11 (2-complement)
     441             : 
     442   356436175 :     Word hinIsNeg = static_cast<Word>(hin >> 2) & WORD_1; // 00...001 if hin is -1, 00...000 if 0 or 1
     443             : 
     444   356436175 :     Word Xv = Eq | Mv;
     445             :     // This is instruction below written using 'if': if (hin < 0) Eq |= (Word)1;
     446   356436175 :     Eq |= hinIsNeg;
     447   356436175 :     Word Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;
     448             : 
     449   356436175 :     Word Ph = Mv | ~(Xh | Pv);
     450   356436175 :     Word Mh = Pv & Xh;
     451             : 
     452   356436175 :     int hout = 0;
     453             :     // This is instruction below written using 'if': if (Ph & HIGH_BIT_MASK) hout = 1;
     454   356436175 :     hout = (Ph & HIGH_BIT_MASK) >> (WORD_SIZE - 1);
     455             :     // This is instruction below written using 'if': if (Mh & HIGH_BIT_MASK) hout = -1;
     456   356436175 :     hout -= (Mh & HIGH_BIT_MASK) >> (WORD_SIZE - 1);
     457             : 
     458   356436175 :     Ph <<= 1;
     459   356436175 :     Mh <<= 1;
     460             : 
     461             :     // This is instruction below written using 'if': if (hin < 0) Mh |= (Word)1;
     462   356436175 :     Mh |= hinIsNeg;
     463             :     // This is instruction below written using 'if': if (hin > 0) Ph |= (Word)1;
     464   356436175 :     Ph |= static_cast<Word>((hin + 1) >> 1);
     465             : 
     466   356436175 :     PvOut = Mh | ~(Xv | Ph);
     467   356436175 :     MvOut = Ph & Xv;
     468             : 
     469   356436175 :     return hout;
     470             : }
     471             : 
     472             : /**
     473             :  * Does ceiling division x / y.
     474             :  * Note: x and y must be non-negative and x + y must not overflow.
     475             :  */
     476       19600 : static inline int ceilDiv(const int x, const int y) {
     477       19600 :     return x % y ? x / y + 1 : x / y;
     478             : }
     479             : 
     480    21957225 : static inline int min(const int x, const int y) {
     481    21957225 :     return x < y ? x : y;
     482             : }
     483             : 
     484    21945275 : static inline int max(const int x, const int y) {
     485    21945275 :     return x > y ? x : y;
     486             : }
     487             : 
     488             : 
     489             : /**
     490             :  * @param [in] block
     491             :  * @return Values of cells in block, starting with bottom cell in block.
     492             :  */
     493       28700 : static inline std::vector<int> getBlockCellValues(const Block block) {
     494       28700 :         std::vector<int> scores(WORD_SIZE);
     495       28700 :     int score = block.score;
     496       28700 :     Word mask = HIGH_BIT_MASK;
     497     1836800 :     for (int i = 0; i < WORD_SIZE - 1; i++) {
     498     1808100 :         scores[i] = score;
     499     1808100 :         if (block.P & mask) score--;
     500     1808100 :         if (block.M & mask) score++;
     501     1808100 :         mask >>= 1;
     502             :     }
     503       28700 :     scores[WORD_SIZE - 1] = score;
     504       28700 :     return scores;
     505             : }
     506             : 
     507             : /**
     508             :  * Writes values of cells in block into given array, starting with first/top cell.
     509             :  * @param [in] block
     510             :  * @param [out] dest  Array into which cell values are written. Must have size of at least WORD_SIZE.
     511             :  */
     512        5825 : static inline void readBlock(const Block block, int* const dest) {
     513        5825 :     int score = block.score;
     514        5825 :     Word mask = HIGH_BIT_MASK;
     515      372800 :     for (int i = 0; i < WORD_SIZE - 1; i++) {
     516      366975 :         dest[WORD_SIZE - 1 - i] = score;
     517      366975 :         if (block.P & mask) score--;
     518      366975 :         if (block.M & mask) score++;
     519      366975 :         mask >>= 1;
     520             :     }
     521        5825 :     dest[0] = score;
     522        5825 : }
     523             : 
     524             : /**
     525             :  * Writes values of cells in block into given array, starting with last/bottom cell.
     526             :  * @param [in] block
     527             :  * @param [out] dest  Array into which cell values are written. Must have size of at least WORD_SIZE.
     528             :  */
     529        5825 : static inline void readBlockReverse(const Block block, int* const dest) {
     530        5825 :     int score = block.score;
     531        5825 :     Word mask = HIGH_BIT_MASK;
     532      372800 :     for (int i = 0; i < WORD_SIZE - 1; i++) {
     533      366975 :         dest[i] = score;
     534      366975 :         if (block.P & mask) score--;
     535      366975 :         if (block.M & mask) score++;
     536      366975 :         mask >>= 1;
     537             :     }
     538        5825 :     dest[WORD_SIZE - 1] = score;
     539        5825 : }
     540             : 
     541             : /**
     542             :  * @param [in] block
     543             :  * @param [in] k
     544             :  * @return True if all cells in block have value larger than k, otherwise false.
     545             :  */
     546             : static inline bool allBlockCellsLarger(const Block block, const int k) {
     547             :         std::vector<int> scores = getBlockCellValues(block);
     548             :     for (int i = 0; i < WORD_SIZE; i++) {
     549             :         if (scores[i] <= k) return false;
     550             :     }
     551             :     return true;
     552             : }
     553             : 
     554             : 
     555             : /**
     556             :  * Uses Myers' bit-vector algorithm to find edit distance for global(NW) alignment method.
     557             :  * @param [in] Peq  Query profile.
     558             :  * @param [in] W  Size of padding in last block.
     559             :  *                TODO: Calculate this directly from query, instead of passing it.
     560             :  * @param [in] maxNumBlocks  Number of blocks needed to cover the whole query.
     561             :  *                           TODO: Calculate this directly from query, instead of passing it.
     562             :  * @param [in] queryLength
     563             :  * @param [in] target
     564             :  * @param [in] targetLength
     565             :  * @param [in] k
     566             :  * @param [out] bestScore_  Edit distance.
     567             :  * @param [out] position_  0-indexed position in target at which best score was found.
     568             :  * @param [in] findAlignment  If true, whole matrix is remembered and alignment data is returned.
     569             :  *                            Quadratic amount of memory is consumed.
     570             :  * @param [out] alignData  Data needed for alignment traceback (for reconstruction of alignment).
     571             :  *                         Set only if findAlignment is set to true, otherwise it is NULL.
     572             :  *                         Make sure to free this array using delete[].
     573             :  * @param [out] targetStopPosition  If set to -1, whole calculation is performed normally, as expected.
     574             :  *         If set to p, calculation is performed up to position p in target (inclusive)
     575             :  *         and column p is returned as the only column in alignData.
     576             :  * @return Status.
     577             :  */
     578        6225 : static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int maxNumBlocks,
     579             :                                    const int queryLength,
     580             :                                    const unsigned char* const target, const int targetLength,
     581             :                                    int k, int* const bestScore_,
     582             :                                    int* const position_, const bool findAlignment,
     583             :                                    AlignmentData** const alignData, const int targetStopPosition) {
     584        6225 :     if (targetStopPosition > -1 && findAlignment) {
     585             :         // They can not be both set at the same time!
     586           0 :         return EDLIB_STATUS_ERROR;
     587             :     }
     588             : 
     589             :     // Each STRONG_REDUCE_NUM column is reduced in more expensive way.
     590        6225 :     const int STRONG_REDUCE_NUM = 2048; // TODO: Choose this number dinamically (based on query and target lengths?), so it does not affect speed of computation
     591             : 
     592        6225 :     if (k < abs(targetLength - queryLength)) {
     593         250 :         *bestScore_ = *position_ = -1;
     594         250 :         return EDLIB_STATUS_OK;
     595             :     }
     596             : 
     597        5975 :     k = min(k, max(queryLength, targetLength));  // Upper bound for k
     598             : 
     599             :     // firstBlock is 0-based index of first block in Ukkonen band.
     600             :     // lastBlock is 0-based index of last block in Ukkonen band.
     601        5975 :     int firstBlock = 0;
     602             :     // This is optimal now, by my formula.
     603        5975 :     int lastBlock = min(maxNumBlocks, ceilDiv(min(k, (k + queryLength - targetLength) / 2) + 1, WORD_SIZE)) - 1;
     604             :     Block* bl; // Current block
     605             : 
     606      694100 :     Block* blocks = new Block[maxNumBlocks];
     607             : 
     608             :     // Initialize P, M and score
     609        5975 :     bl = blocks;
     610       20625 :     for (int b = 0; b <= lastBlock; b++) {
     611       14650 :         bl->score = (b + 1) * WORD_SIZE;
     612       14650 :         bl->P = static_cast<Word>(-1); // All 1s
     613       14650 :         bl->M = static_cast<Word>(0);
     614       14650 :         bl++;
     615             :     }
     616             : 
     617             :     // If we want to find alignment, we have to store needed data.
     618        5975 :     if (findAlignment)
     619        2075 :         *alignData = new AlignmentData(maxNumBlocks, targetLength);
     620        3900 :     else if (targetStopPosition > -1)
     621        3250 :         *alignData = new AlignmentData(maxNumBlocks, 1);
     622             :     else
     623         650 :         *alignData = NULL;
     624             : 
     625        5975 :     const unsigned char* targetChar = target;
     626    21940225 :     for (int c = 0; c < targetLength; c++) { // for each column
     627    21937675 :         const Word* Peq_c = Peq + *targetChar * maxNumBlocks;
     628             : 
     629             :         //----------------------- Calculate column -------------------------//
     630    21937675 :         int hout = 1;
     631    21937675 :         bl = blocks + firstBlock;
     632   356552600 :         for (int b = firstBlock; b <= lastBlock; b++) {
     633   334614925 :             hout = calculateBlock(bl->P, bl->M, Peq_c[b], hout, bl->P, bl->M);
     634   334614925 :             bl->score += hout;
     635   334614925 :             bl++;
     636             :         }
     637    21937675 :         bl--;
     638             :         //------------------------------------------------------------------//
     639             :         // bl now points to last block
     640             : 
     641             :         // Update k. I do it only on end of column because it would slow calculation too much otherwise.
     642             :         // NOTICE: I add W when in last block because it is actually result from W cells to the left and W cells up.
     643    21937675 :         k = min(k, bl->score
     644    21937675 :                 + max(targetLength - c - 1, queryLength - ((1 + lastBlock) * WORD_SIZE - 1) - 1)
     645    21937675 :                 + (lastBlock == maxNumBlocks - 1 ? W : 0));
     646             : 
     647             :         //---------- Adjust number of blocks according to Ukkonen ----------//
     648             :         //--- Adjust last block ---//
     649             :         // If block is not beneath band, calculate next block. Only next because others are certainly beneath band.
     650    21937675 :         if (lastBlock + 1 < maxNumBlocks
     651    21821250 :             && !(//score[lastBlock] >= k + WORD_SIZE ||  // NOTICE: this condition could be satisfied if above block also!
     652    21821250 :                  ((lastBlock + 1) * WORD_SIZE - 1
     653    21821250 :                   > k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength))) {
     654    21821250 :             lastBlock++; bl++;
     655    21821250 :             bl->P = static_cast<Word>(-1); // All 1s
     656    21821250 :             bl->M = static_cast<Word>(0);
     657    21821250 :             int newHout = calculateBlock(bl->P, bl->M, Peq_c[lastBlock], hout, bl->P, bl->M);
     658    21821250 :             bl->score = (bl - 1)->score - hout + WORD_SIZE + newHout;
     659    21821250 :             hout = newHout;
     660             :         }
     661             : 
     662             :         // While block is out of band, move one block up.
     663             :         // NOTE: Condition used here is more loose than the one from the article, since I simplified the max() part of it.
     664             :         // I could consider adding that max part, for optimal performance.
     665    21937675 :         while (lastBlock >= firstBlock
     666    43422725 :                && (bl->score >= k + WORD_SIZE
     667    40152450 :                    || ((lastBlock + 1) * WORD_SIZE - 1 >
     668             :                        // TODO: Does not work if do not put +1! Why???
     669    40152450 :                        k - bl->score + 2 * WORD_SIZE - 2 - targetLength + c + queryLength + 1))) {
     670    21485050 :             lastBlock--; bl--;
     671             :         }
     672             :         //-------------------------//
     673             : 
     674             :         //--- Adjust first block ---//
     675             :         // While outside of band, advance block
     676    21937675 :         while (firstBlock <= lastBlock
     677    22274275 :                && (blocks[firstBlock].score >= k + WORD_SIZE
     678    22274100 :                    || ((firstBlock + 1) * WORD_SIZE - 1 <
     679    22274100 :                        blocks[firstBlock].score - k - targetLength + queryLength + c))) {
     680      336600 :             firstBlock++;
     681             :         }
     682             :         //--------------------------/
     683             : 
     684             : 
     685             :         // TODO: consider if this part is useful, it does not seem to help much
     686    21937675 :         if (c % STRONG_REDUCE_NUM == 0) { // Every some columns do more expensive but more efficient reduction
     687       13075 :             while (lastBlock >= firstBlock) {
     688             :                 // If all cells outside of band, remove block
     689       13075 :                 std::vector<int> scores = getBlockCellValues(*bl);
     690       13075 :                 int numCells = lastBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
     691       13075 :                 int r = lastBlock * WORD_SIZE + numCells - 1;
     692       13075 :                 bool reduce = true;
     693      386800 :                 for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
     694             :                     // TODO: Does not work if do not put +1! Why???
     695      386800 :                     if (scores[i] <= k && r <= k - scores[i] - targetLength + c + queryLength + 1) {
     696       13075 :                         reduce = false;
     697       13075 :                         break;
     698             :                     }
     699      373725 :                     r--;
     700             :                 }
     701       13075 :                 if (!reduce) break;
     702           0 :                 lastBlock--; bl--;
     703       13075 :             }
     704             : 
     705       13075 :             while (firstBlock <= lastBlock) {
     706             :                 // If all cells outside of band, remove block
     707       13075 :                 std::vector<int> scores = getBlockCellValues(blocks[firstBlock]);
     708       13075 :                 int numCells = firstBlock == maxNumBlocks - 1 ? WORD_SIZE - W : WORD_SIZE;
     709       13075 :                 int r = firstBlock * WORD_SIZE + numCells - 1;
     710       13075 :                 bool reduce = true;
     711       33000 :                 for (int i = WORD_SIZE - numCells; i < WORD_SIZE; i++) {
     712       33000 :                     if (scores[i] <= k && r >= scores[i] - k - targetLength + c + queryLength) {
     713       13075 :                         reduce = false;
     714       13075 :                         break;
     715             :                     }
     716       19925 :                     r--;
     717             :                 }
     718       13075 :                 if (!reduce) break;
     719           0 :                 firstBlock++;
     720       13075 :             }
     721             :         }
     722             : 
     723             : 
     724             :         // If band stops to exist finish
     725    21937675 :         if (lastBlock < firstBlock) {
     726         175 :             *bestScore_ = *position_ = -1;
     727         175 :             delete[] blocks;
     728         175 :             return EDLIB_STATUS_OK;
     729             :         }
     730             :         //------------------------------------------------------------------//
     731             : 
     732             : 
     733             :         //---- Save column so it can be used for reconstruction ----//
     734    21937500 :         if (findAlignment && c < targetLength) {
     735     2574900 :             bl = blocks + firstBlock;
     736     6254650 :             for (int b = firstBlock; b <= lastBlock; b++) {
     737     3679750 :                 (*alignData)->Ps[maxNumBlocks * c + b] = bl->P;
     738     3679750 :                 (*alignData)->Ms[maxNumBlocks * c + b] = bl->M;
     739     3679750 :                 (*alignData)->scores[maxNumBlocks * c + b] = bl->score;
     740     3679750 :                 bl++;
     741             :             }
     742     2574900 :             (*alignData)->firstBlocks[c] = firstBlock;
     743     2574900 :             (*alignData)->lastBlocks[c] = lastBlock;
     744             :         }
     745             :         //----------------------------------------------------------//
     746             :         //---- If this is stop column, save it and finish ----//
     747    21937500 :         if (c == targetStopPosition) {
     748       14900 :             for (int b = firstBlock; b <= lastBlock; b++) {
     749       11650 :                 (*alignData)->Ps[b] = (blocks + b)->P;
     750       11650 :                 (*alignData)->Ms[b] = (blocks + b)->M;
     751       11650 :                 (*alignData)->scores[b] = (blocks + b)->score;
     752             :             }
     753        3250 :             (*alignData)->firstBlocks[0] = firstBlock;
     754        3250 :             (*alignData)->lastBlocks[0] = lastBlock;
     755        3250 :             *bestScore_ = -1;
     756        3250 :             *position_ = targetStopPosition;
     757        3250 :             delete[] blocks;
     758        3250 :             return EDLIB_STATUS_OK;
     759             :         }
     760             :         //----------------------------------------------------//
     761             : 
     762    21934250 :         targetChar++;
     763             :     }
     764             : 
     765        2550 :     if (lastBlock == maxNumBlocks - 1) { // If last block of last column was calculated
     766             :         // Obtain best score from block -> it is complicated because query is padded with W cells
     767        2550 :         int bestScore = getBlockCellValues(blocks[lastBlock])[W];
     768        2550 :         if (bestScore <= k) {
     769        2525 :             *bestScore_ = bestScore;
     770        2525 :             *position_ = targetLength - 1;
     771        2525 :             delete[] blocks;
     772        2525 :             return EDLIB_STATUS_OK;
     773             :         }
     774             :     }
     775             : 
     776          25 :     *bestScore_ = *position_ = -1;
     777          25 :     delete[] blocks;
     778          25 :     return EDLIB_STATUS_OK;
     779             : }
     780             : 
     781             : 
     782             : /**
     783             :  * Finds one possible alignment that gives optimal score by moving back through the dynamic programming matrix,
     784             :  * that is stored in alignData. Consumes large amount of memory: O(queryLength * targetLength).
     785             :  * @param [in] queryLength  Normal length, without W.
     786             :  * @param [in] targetLength  Normal length, without W.
     787             :  * @param [in] bestScore  Best score.
     788             :  * @param [in] alignData  Data obtained during finding best score that is useful for finding alignment.
     789             :  * @param [out] alignment  Alignment.
     790             :  * @param [out] alignmentLength  Length of alignment.
     791             :  * @return Status code.
     792             :  */
     793        2075 : static int obtainAlignmentTraceback(const int queryLength, const int targetLength,
     794             :                                     const int bestScore, const AlignmentData* const alignData,
     795             :                                     unsigned char** const alignment, int* const alignmentLength) {
     796        2075 :     const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
     797        2075 :     const int W = maxNumBlocks * WORD_SIZE - queryLength;
     798             : 
     799        2075 :     *alignment = static_cast<unsigned char*>(malloc((queryLength + targetLength - 1) * sizeof(unsigned char)));
     800        2075 :     *alignmentLength = 0;
     801        2075 :     int c = targetLength - 1; // index of column
     802        2075 :     int b = maxNumBlocks - 1; // index of block in column
     803        2075 :     int currScore = bestScore; // Score of current cell
     804        2075 :     int lScore  = -1; // Score of left cell
     805        2075 :     int uScore  = -1; // Score of upper cell
     806        2075 :     int ulScore = -1; // Score of upper left cell
     807        2075 :     Word currP = alignData->Ps[c * maxNumBlocks + b]; // P of current block
     808        2075 :     Word currM = alignData->Ms[c * maxNumBlocks + b]; // M of current block
     809             :     // True if block to left exists and is in band
     810        2075 :     bool thereIsLeftBlock = c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1];
     811             :     // We set initial values of lP and lM to 0 only to avoid compiler warnings, they should not affect the
     812             :     // calculation as both lP and lM should be initialized at some moment later (but compiler can not
     813             :     // detect it since this initialization is guaranteed by "business" logic).
     814        2075 :     Word lP = 0, lM = 0;
     815        2075 :     if (thereIsLeftBlock) {
     816        2075 :         lP = alignData->Ps[(c - 1) * maxNumBlocks + b]; // P of block to the left
     817        2075 :         lM = alignData->Ms[(c - 1) * maxNumBlocks + b]; // M of block to the left
     818             :     }
     819        2075 :     currP <<= W;
     820        2075 :     currM <<= W;
     821        2075 :     int blockPos = WORD_SIZE - W - 1; // 0 based index of current cell in blockPos
     822             : 
     823             :     // TODO(martin): refactor this whole piece of code. There are too many if-else statements,
     824             :     // it is too easy for a bug to hide and to hard to effectively cover all the edge-cases.
     825             :     // We need better separation of logic and responsibilities.
     826             :     while (true) {
     827     2625825 :         if (c == 0) {
     828        2000 :             thereIsLeftBlock = true;
     829        2000 :             lScore = b * WORD_SIZE + blockPos + 1;
     830        2000 :             ulScore = lScore - 1;
     831             :         }
     832             : 
     833             :         // TODO: improvement: calculate only those cells that are needed,
     834             :         //       for example if I calculate upper cell and can move up,
     835             :         //       there is no need to calculate left and upper left cell
     836             :         //---------- Calculate scores ---------//
     837     2625825 :         if (lScore == -1 && thereIsLeftBlock) {
     838     2571800 :             lScore = alignData->scores[(c - 1) * maxNumBlocks + b]; // score of block to the left
     839    84554900 :             for (int i = 0; i < WORD_SIZE - blockPos - 1; i++) {
     840    81983100 :                 if (lP & HIGH_BIT_MASK) lScore--;
     841    81983100 :                 if (lM & HIGH_BIT_MASK) lScore++;
     842    81983100 :                 lP <<= 1;
     843    81983100 :                 lM <<= 1;
     844             :             }
     845             :         }
     846     2625825 :         if (ulScore == -1) {
     847     2623825 :             if (lScore != -1) {
     848     2622950 :                 ulScore = lScore;
     849     2622950 :                 if (lP & HIGH_BIT_MASK) ulScore--;
     850     2622950 :                 if (lM & HIGH_BIT_MASK) ulScore++;
     851             :             }
     852         875 :             else if (c > 0 && b-1 >= alignData->firstBlocks[c-1] && b-1 <= alignData->lastBlocks[c-1]) {
     853             :                 // This is the case when upper left cell is last cell in block,
     854             :                 // and block to left is not in band so lScore is -1.
     855         875 :                 ulScore = alignData->scores[(c - 1) * maxNumBlocks + b - 1];
     856             :             }
     857             :         }
     858     2625825 :         if (uScore == -1) {
     859     2568525 :             uScore = currScore;
     860     2568525 :             if (currP & HIGH_BIT_MASK) uScore--;
     861     2568525 :             if (currM & HIGH_BIT_MASK) uScore++;
     862     2568525 :             currP <<= 1;
     863     2568525 :             currM <<= 1;
     864             :         }
     865             :         //-------------------------------------//
     866             : 
     867             :         // TODO: should I check if there is upper block?
     868             : 
     869             :         //-------------- Move --------------//
     870             :         // Move up - insertion to target - deletion from query
     871     2625825 :         if (uScore != -1 && uScore + 1 == currScore) {
     872       51225 :             currScore = uScore;
     873       51225 :             lScore = ulScore;
     874       51225 :             uScore = ulScore = -1;
     875       51225 :             if (blockPos == 0) { // If entering new (upper) block
     876         950 :                 if (b == 0) { // If there are no cells above (only boundary cells)
     877           0 :                     (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT; // Move up
     878           0 :                     for (int i = 0; i < c + 1; i++) // Move left until end
     879           0 :                         (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
     880           0 :                     break;
     881             :                 } else {
     882         950 :                     blockPos = WORD_SIZE - 1;
     883         950 :                     b--;
     884         950 :                     currP = alignData->Ps[c * maxNumBlocks + b];
     885         950 :                     currM = alignData->Ms[c * maxNumBlocks + b];
     886         950 :                     if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
     887         950 :                         thereIsLeftBlock = true;
     888         950 :                         lP = alignData->Ps[(c - 1) * maxNumBlocks + b]; // TODO: improve this, too many operations
     889         950 :                         lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
     890             :                     } else {
     891           0 :                         thereIsLeftBlock = false;
     892             :                         // TODO(martin): There may not be left block, but there can be left boundary - do we
     893             :                         // handle this correctly then? Are l and ul score set correctly? I should check that / refactor this.
     894             :                     }
     895             :                 }
     896             :             } else {
     897       50275 :                 blockPos--;
     898       50275 :                 lP <<= 1;
     899       50275 :                 lM <<= 1;
     900             :             }
     901             :             // Mark move
     902       51225 :             (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
     903       51225 :         }
     904             :         // Move left - deletion from target - insertion to query
     905     2574600 :         else if (lScore != -1 && lScore + 1 == currScore) {
     906       57300 :             currScore = lScore;
     907       57300 :             uScore = ulScore;
     908       57300 :             lScore = ulScore = -1;
     909       57300 :             c--;
     910       57300 :             if (c == -1) { // If there are no cells to the left (only boundary cells)
     911           0 :                 (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE; // Move left
     912           0 :                 int numUp = b * WORD_SIZE + blockPos + 1;
     913           0 :                 for (int i = 0; i < numUp; i++) // Move up until end
     914           0 :                     (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
     915           0 :                 break;
     916             :             }
     917       57300 :             currP = lP;
     918       57300 :             currM = lM;
     919       57300 :             if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
     920       57300 :                 thereIsLeftBlock = true;
     921       57300 :                 lP = alignData->Ps[(c - 1) * maxNumBlocks + b];
     922       57300 :                 lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
     923             :             } else {
     924           0 :                 if (c == 0) { // If there are no cells to the left (only boundary cells)
     925           0 :                     thereIsLeftBlock = true;
     926           0 :                     lScore = b * WORD_SIZE + blockPos + 1;
     927           0 :                     ulScore = lScore - 1;
     928             :                 } else {
     929           0 :                     thereIsLeftBlock = false;
     930             :                 }
     931             :             }
     932             :             // Mark move
     933       57300 :             (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
     934       57300 :         }
     935             :         // Move up left - (mis)match
     936     2517300 :         else if (ulScore != -1) {
     937     2517300 :             unsigned char moveCode = ulScore == currScore ? EDLIB_EDOP_MATCH : EDLIB_EDOP_MISMATCH;
     938     2517300 :             currScore = ulScore;
     939     2517300 :             uScore = lScore = ulScore = -1;
     940     2517300 :             c--;
     941     2517300 :             if (c == -1) { // If there are no cells to the left (only boundary cells)
     942        1925 :                 (*alignment)[(*alignmentLength)++] = moveCode; // Move left
     943        1925 :                 int numUp = b * WORD_SIZE + blockPos;
     944        2000 :                 for (int i = 0; i < numUp; i++) // Move up until end
     945          75 :                     (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_INSERT;
     946        1925 :                 break;
     947             :             }
     948     2515375 :             if (blockPos == 0) { // If entering upper left block
     949       38500 :                 if (b == 0) { // If there are no more cells above (only boundary cells)
     950         150 :                     (*alignment)[(*alignmentLength)++] = moveCode; // Move up left
     951         450 :                     for (int i = 0; i < c + 1; i++) // Move left until end
     952         300 :                         (*alignment)[(*alignmentLength)++] = EDLIB_EDOP_DELETE;
     953         150 :                     break;
     954             :                 }
     955       38350 :                 blockPos = WORD_SIZE - 1;
     956       38350 :                 b--;
     957       38350 :                 currP = alignData->Ps[c * maxNumBlocks + b];
     958       38350 :                 currM = alignData->Ms[c * maxNumBlocks + b];
     959             :             } else { // If entering left block
     960     2476875 :                 blockPos--;
     961     2476875 :                 currP = lP;
     962     2476875 :                 currM = lM;
     963     2476875 :                 currP <<= 1;
     964     2476875 :                 currM <<= 1;
     965             :             }
     966             :             // Set new left block
     967     2515225 :             if (c > 0 && b >= alignData->firstBlocks[c-1] && b <= alignData->lastBlocks[c-1]) {
     968     2512425 :                 thereIsLeftBlock = true;
     969     2512425 :                 lP = alignData->Ps[(c - 1) * maxNumBlocks + b];
     970     2512425 :                 lM = alignData->Ms[(c - 1) * maxNumBlocks + b];
     971             :             } else {
     972        2800 :                 if (c == 0) { // If there are no cells to the left (only boundary cells)
     973        1925 :                     thereIsLeftBlock = true;
     974        1925 :                     lScore = b * WORD_SIZE + blockPos + 1;
     975        1925 :                     ulScore = lScore - 1;
     976             :                 } else {
     977         875 :                     thereIsLeftBlock = false;
     978             :                 }
     979             :             }
     980             :             // Mark move
     981     2515225 :             (*alignment)[(*alignmentLength)++] = moveCode;
     982             :         } else {
     983             :             // Reached end - finished!
     984           0 :             break;
     985             :         }
     986             :         //----------------------------------//
     987     2623750 :     }
     988             : 
     989        2075 :     *alignment = static_cast<unsigned char*>(realloc(*alignment, (*alignmentLength) * sizeof(unsigned char)));
     990        2075 :     std::reverse(*alignment, *alignment + (*alignmentLength));
     991        2075 :     return EDLIB_STATUS_OK;
     992             : }
     993             : 
     994             : 
     995             : /**
     996             :  * Finds one possible alignment that gives optimal score (bestScore).
     997             :  * It will split problem into smaller problems using Hirschberg's algorithm and when they are small enough,
     998             :  * it will solve them using traceback algorithm.
     999             :  * @param [in] query
    1000             :  * @param [in] rQuery  Reversed query.
    1001             :  * @param [in] queryLength
    1002             :  * @param [in] target
    1003             :  * @param [in] rTarget  Reversed target.
    1004             :  * @param [in] targetLength
    1005             :  * @param [in] equalityDefinition
    1006             :  * @param [in] alphabetLength
    1007             :  * @param [in] bestScore  Best(optimal) score.
    1008             :  * @param [out] alignment  Sequence of edit operations that make target equal to query.
    1009             :  * @param [out] alignmentLength  Length of alignment.
    1010             :  * @return Status code.
    1011             :  */
    1012        3700 : static int obtainAlignment(
    1013             :         const unsigned char* const query, const unsigned char* const rQuery, const int queryLength,
    1014             :         const unsigned char* const target, const unsigned char* const rTarget, const int targetLength,
    1015             :         const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore,
    1016             :         unsigned char** const alignment, int* const alignmentLength) {
    1017             : 
    1018             :     // Handle special case when one of sequences has length of 0.
    1019        3700 :     if (queryLength == 0 || targetLength == 0) {
    1020           0 :         *alignmentLength = targetLength + queryLength;
    1021           0 :         *alignment = static_cast<unsigned char*>(malloc((*alignmentLength) * sizeof(unsigned char)));
    1022           0 :         for (int i = 0; i < *alignmentLength; i++) {
    1023           0 :             (*alignment)[i] = queryLength == 0 ? EDLIB_EDOP_DELETE : EDLIB_EDOP_INSERT;
    1024             :         }
    1025           0 :         return EDLIB_STATUS_OK;
    1026             :     }
    1027             : 
    1028        3700 :     const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
    1029        3700 :     const int W = maxNumBlocks * WORD_SIZE - queryLength;
    1030             :     int statusCode;
    1031             : 
    1032             :     // TODO: think about reducing number of memory allocations in alignment functions, probably
    1033             :     // by sharing some memory that is allocated only once. That refers to: Peq, columns in Hirschberg,
    1034             :     // and it could also be done for alignments - we could have one big array for alignment that would be
    1035             :     // sparsely populated by each of steps in recursion, and at the end we would just consolidate those results.
    1036             : 
    1037             :     // If estimated memory consumption for traceback algorithm is smaller than 1MB use it,
    1038             :     // otherwise use Hirschberg's algorithm. By running few tests I choose boundary of 1MB as optimal.
    1039        3700 :     long long alignmentDataSize = (2ll * sizeof(Word) + sizeof(int)) * maxNumBlocks * targetLength
    1040        3700 :         + 2ll * sizeof(int) * targetLength;
    1041        3700 :     if (alignmentDataSize < 1024 * 1024) {
    1042             :         int score_, endLocation_;  // Used only to call function.
    1043        2075 :         AlignmentData* alignData = NULL;
    1044        2075 :         Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition);
    1045        2075 :         myersCalcEditDistanceNW(Peq, W, maxNumBlocks,
    1046             :                                 queryLength,
    1047             :                                 target, targetLength,
    1048             :                                 bestScore,
    1049             :                                 &score_, &endLocation_, true, &alignData, -1);
    1050             :         //assert(score_ == bestScore);
    1051             :         //assert(endLocation_ == targetLength - 1);
    1052             : 
    1053        2075 :         statusCode = obtainAlignmentTraceback(queryLength, targetLength,
    1054             :                                               bestScore, alignData, alignment, alignmentLength);
    1055        2075 :         delete alignData;
    1056        2075 :         delete[] Peq;
    1057             :     } else {
    1058        1625 :         statusCode = obtainAlignmentHirschberg(query, rQuery, queryLength,
    1059             :                                                target, rTarget, targetLength,
    1060             :                                                equalityDefinition, alphabetLength, bestScore,
    1061             :                                                alignment, alignmentLength);
    1062             :     }
    1063        3700 :     return statusCode;
    1064             : }
    1065             : 
    1066             : 
    1067             : /**
    1068             :  * Finds one possible alignment that gives optimal score (bestScore).
    1069             :  * Uses Hirschberg's algorithm to split problem into two sub-problems, solve them and combine them together.
    1070             :  * @param [in] query
    1071             :  * @param [in] rQuery  Reversed query.
    1072             :  * @param [in] queryLength
    1073             :  * @param [in] target
    1074             :  * @param [in] rTarget  Reversed target.
    1075             :  * @param [in] targetLength
    1076             :  * @param [in] alphabetLength
    1077             :  * @param [in] bestScore  Best(optimal) score.
    1078             :  * @param [out] alignment  Sequence of edit operations that make target equal to query.
    1079             :  * @param [out] alignmentLength  Length of alignment.
    1080             :  * @return Status code.
    1081             :  */
    1082        1625 : static int obtainAlignmentHirschberg(
    1083             :         const unsigned char* const query, const unsigned char* const rQuery, const int queryLength,
    1084             :         const unsigned char* const target, const unsigned char* const rTarget, const int targetLength,
    1085             :         const EqualityDefinition& equalityDefinition, const int alphabetLength, const int bestScore,
    1086             :         unsigned char** const alignment, int* const alignmentLength) {
    1087             : 
    1088        1625 :     const int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE);
    1089        1625 :     const int W = maxNumBlocks * WORD_SIZE - queryLength;
    1090             : 
    1091        1625 :     Word* Peq = buildPeq(alphabetLength, query, queryLength, equalityDefinition);
    1092        1625 :     Word* rPeq = buildPeq(alphabetLength, rQuery, queryLength, equalityDefinition);
    1093             : 
    1094             :     // Used only to call functions.
    1095             :     int score_, endLocation_;
    1096             : 
    1097             :     // Divide dynamic matrix into two halfs, left and right.
    1098        1625 :     const int leftHalfWidth = targetLength / 2;
    1099        1625 :     const int rightHalfWidth = targetLength - leftHalfWidth;
    1100             : 
    1101             :     // Calculate left half.
    1102        1625 :     AlignmentData* alignDataLeftHalf = NULL;
    1103        1625 :     int leftHalfCalcStatus = myersCalcEditDistanceNW(
    1104             :             Peq, W, maxNumBlocks, queryLength, target, targetLength, bestScore,
    1105             :             &score_, &endLocation_, false, &alignDataLeftHalf, leftHalfWidth - 1);
    1106             : 
    1107             :     // Calculate right half.
    1108        1625 :     AlignmentData* alignDataRightHalf = NULL;
    1109        1625 :     int rightHalfCalcStatus = myersCalcEditDistanceNW(
    1110             :             rPeq, W, maxNumBlocks, queryLength, rTarget, targetLength, bestScore,
    1111             :             &score_, &endLocation_, false, &alignDataRightHalf, rightHalfWidth - 1);
    1112             : 
    1113        1625 :     delete[] Peq;
    1114        1625 :     delete[] rPeq;
    1115             : 
    1116        1625 :     if (leftHalfCalcStatus == EDLIB_STATUS_ERROR || rightHalfCalcStatus == EDLIB_STATUS_ERROR) {
    1117           0 :         if (alignDataLeftHalf) delete alignDataLeftHalf;
    1118           0 :         if (alignDataRightHalf) delete alignDataRightHalf;
    1119           0 :         return EDLIB_STATUS_ERROR;
    1120             :     }
    1121             : 
    1122             :     // Unwrap the left half.
    1123        1625 :     int firstBlockIdxLeft = alignDataLeftHalf->firstBlocks[0];
    1124        1625 :     int lastBlockIdxLeft = alignDataLeftHalf->lastBlocks[0];
    1125             :     // TODO: avoid this allocation by using some shared array?
    1126             :     // scoresLeft contains scores from left column, starting with scoresLeftStartIdx row (query index)
    1127             :     // and ending with scoresLeftEndIdx row (0-indexed).
    1128        1625 :     int scoresLeftLength = (lastBlockIdxLeft - firstBlockIdxLeft + 1) * WORD_SIZE;
    1129        1625 :     int* scoresLeft = new int[scoresLeftLength];
    1130        7450 :     for (int blockIdx = firstBlockIdxLeft; blockIdx <= lastBlockIdxLeft; blockIdx++) {
    1131        5825 :         Block block(alignDataLeftHalf->Ps[blockIdx], alignDataLeftHalf->Ms[blockIdx],
    1132        5825 :                     alignDataLeftHalf->scores[blockIdx]);
    1133        5825 :         readBlock(block, scoresLeft + (blockIdx - firstBlockIdxLeft) * WORD_SIZE);
    1134             :     }
    1135        1625 :     int scoresLeftStartIdx = firstBlockIdxLeft * WORD_SIZE;
    1136             :     // If last block contains padding, shorten the length of scores for the length of padding.
    1137        1625 :     if (lastBlockIdxLeft == maxNumBlocks - 1) {
    1138           0 :         scoresLeftLength -= W;
    1139             :     }
    1140             : 
    1141             :     // Unwrap the right half (I also reverse it while unwraping).
    1142        1625 :     int firstBlockIdxRight = alignDataRightHalf->firstBlocks[0];
    1143        1625 :     int lastBlockIdxRight = alignDataRightHalf->lastBlocks[0];
    1144        1625 :     int scoresRightLength = (lastBlockIdxRight - firstBlockIdxRight + 1) * WORD_SIZE;
    1145        1625 :     int* scoresRight = new int[scoresRightLength];
    1146        1625 :     int* scoresRightOriginalStart = scoresRight;
    1147        7450 :     for (int blockIdx = firstBlockIdxRight; blockIdx <= lastBlockIdxRight; blockIdx++) {
    1148        5825 :         Block block(alignDataRightHalf->Ps[blockIdx], alignDataRightHalf->Ms[blockIdx],
    1149        5825 :                     alignDataRightHalf->scores[blockIdx]);
    1150        5825 :         readBlockReverse(block, scoresRight + (lastBlockIdxRight - blockIdx) * WORD_SIZE);
    1151             :     }
    1152        1625 :     int scoresRightStartIdx = queryLength - (lastBlockIdxRight + 1) * WORD_SIZE;
    1153             :     // If there is padding at the beginning of scoresRight (that can happen because of reversing that we do),
    1154             :     // move pointer forward to remove the padding (that is why we remember originalStart).
    1155        1625 :     if (scoresRightStartIdx < 0) {
    1156             :         //assert(scoresRightStartIdx == -1 * W);
    1157           0 :         scoresRight += W;
    1158           0 :         scoresRightStartIdx += W;
    1159           0 :         scoresRightLength -= W;
    1160             :     }
    1161             : 
    1162        1625 :     delete alignDataLeftHalf;
    1163        1625 :     delete alignDataRightHalf;
    1164             : 
    1165             :     //--------------------- Find the best move ----------------//
    1166             :     // Find the query/row index of cell in left column which together with its lower right neighbour
    1167             :     // from right column gives the best score (when summed). We also have to consider boundary cells
    1168             :     // (those cells at -1 indexes).
    1169             :     //  x|
    1170             :     //  -+-
    1171             :     //   |x
    1172        1625 :     int queryIdxLeftStart = max(scoresLeftStartIdx, scoresRightStartIdx - 1);
    1173        1625 :     int queryIdxLeftEnd = min(scoresLeftStartIdx + scoresLeftLength - 1,
    1174        1625 :                           scoresRightStartIdx + scoresRightLength - 2);
    1175        1625 :     int leftScore = -1, rightScore = -1;
    1176        1625 :     int queryIdxLeftAlignment = -1;  // Query/row index of cell in left column where alignment is passing through.
    1177        1625 :     bool queryIdxLeftAlignmentFound = false;
    1178      161350 :     for (int queryIdx = queryIdxLeftStart; queryIdx <= queryIdxLeftEnd; queryIdx++) {
    1179      161350 :         leftScore = scoresLeft[queryIdx - scoresLeftStartIdx];
    1180      161350 :         rightScore = scoresRight[queryIdx + 1 - scoresRightStartIdx];
    1181      161350 :         if (leftScore + rightScore == bestScore) {
    1182        1625 :             queryIdxLeftAlignment = queryIdx;
    1183        1625 :             queryIdxLeftAlignmentFound = true;
    1184        1625 :             break;
    1185             :         }
    1186             :     }
    1187             :     // Check boundary cells.
    1188        1625 :     if (!queryIdxLeftAlignmentFound && scoresLeftStartIdx == 0 && scoresRightStartIdx == 0) {
    1189           0 :         leftScore = leftHalfWidth;
    1190           0 :         rightScore = scoresRight[0];
    1191           0 :         if (leftScore + rightScore == bestScore) {
    1192           0 :             queryIdxLeftAlignment = -1;
    1193           0 :             queryIdxLeftAlignmentFound = true;
    1194             :         }
    1195             :     }
    1196        1625 :     if (!queryIdxLeftAlignmentFound && scoresLeftStartIdx + scoresLeftLength == queryLength
    1197           0 :         && scoresRightStartIdx + scoresRightLength == queryLength) {
    1198           0 :         leftScore = scoresLeft[scoresLeftLength - 1];
    1199           0 :         rightScore = rightHalfWidth;
    1200           0 :         if (leftScore + rightScore == bestScore) {
    1201           0 :             queryIdxLeftAlignment = queryLength - 1;
    1202           0 :             queryIdxLeftAlignmentFound = true;
    1203             :         }
    1204             :     }
    1205             : 
    1206        1625 :     delete[] scoresLeft;
    1207        1625 :     delete[] scoresRightOriginalStart;
    1208             : 
    1209        1625 :     if (queryIdxLeftAlignmentFound == false) {
    1210             :         // If there was no move that is part of optimal alignment, then there is no such alignment
    1211             :         // or given bestScore is not correct!
    1212           0 :         return EDLIB_STATUS_ERROR;
    1213             :     }
    1214             :     //----------------------------------------------------------//
    1215             : 
    1216             :     // Calculate alignments for upper half of left half (upper left - ul)
    1217             :     // and lower half of right half (lower right - lr).
    1218        1625 :     const int ulHeight = queryIdxLeftAlignment + 1;
    1219        1625 :     const int lrHeight = queryLength - ulHeight;
    1220        1625 :     const int ulWidth = leftHalfWidth;
    1221        1625 :     const int lrWidth = rightHalfWidth;
    1222        1625 :     unsigned char* ulAlignment = NULL; int ulAlignmentLength;
    1223        3250 :     int ulStatusCode = obtainAlignment(query, rQuery + lrHeight, ulHeight,
    1224        1625 :                                        target, rTarget + lrWidth, ulWidth,
    1225             :                                        equalityDefinition, alphabetLength, leftScore,
    1226             :                                        &ulAlignment, &ulAlignmentLength);
    1227        1625 :     unsigned char* lrAlignment = NULL; int lrAlignmentLength;
    1228        3250 :     int lrStatusCode = obtainAlignment(query + ulHeight, rQuery, lrHeight,
    1229        1625 :                                        target + ulWidth, rTarget, lrWidth,
    1230             :                                        equalityDefinition, alphabetLength, rightScore,
    1231             :                                        &lrAlignment, &lrAlignmentLength);
    1232        1625 :     if (ulStatusCode == EDLIB_STATUS_ERROR || lrStatusCode == EDLIB_STATUS_ERROR) {
    1233           0 :         if (ulAlignment) free(ulAlignment);
    1234           0 :         if (lrAlignment) free(lrAlignment);
    1235           0 :         return EDLIB_STATUS_ERROR;
    1236             :     }
    1237             : 
    1238             :     // Build alignment by concatenating upper left alignment with lower right alignment.
    1239        1625 :     *alignmentLength = ulAlignmentLength + lrAlignmentLength;
    1240        1625 :     *alignment = static_cast<unsigned char*>(malloc((*alignmentLength) * sizeof(unsigned char)));
    1241        1625 :     memcpy(*alignment, ulAlignment, ulAlignmentLength);
    1242        1625 :     memcpy(*alignment + ulAlignmentLength, lrAlignment, lrAlignmentLength);
    1243             : 
    1244        1625 :     free(ulAlignment);
    1245        1625 :     free(lrAlignment);
    1246        1625 :     return EDLIB_STATUS_OK;
    1247             : }
    1248             : 
    1249             : 
    1250             : /**
    1251             :  * Takes char query and char target, recognizes alphabet and transforms them into unsigned char sequences
    1252             :  * where elements in sequences are not any more letters of alphabet, but their index in alphabet.
    1253             :  * Most of internal edlib functions expect such transformed sequences.
    1254             :  * This function will allocate queryTransformed and targetTransformed, so make sure to free them when done.
    1255             :  * Example:
    1256             :  *   Original sequences: "ACT" and "CGT".
    1257             :  *   Alphabet would be recognized as "ACTG". Alphabet length = 4.
    1258             :  *   Transformed sequences: [0, 1, 2] and [1, 3, 2].
    1259             :  * @param [in] queryOriginal
    1260             :  * @param [in] queryLength
    1261             :  * @param [in] targetOriginal
    1262             :  * @param [in] targetLength
    1263             :  * @param [out] queryTransformed  It will contain values in range [0, alphabet length - 1].
    1264             :  * @param [out] targetTransformed  It will contain values in range [0, alphabet length - 1].
    1265             :  * @return  Alphabet as a string of unique characters, where index of each character is its value in transformed
    1266             :  *          sequences.
    1267             :  */
    1268         500 : static std::string transformSequences(const char* const queryOriginal, const int queryLength,
    1269             :                                  const char* const targetOriginal, const int targetLength,
    1270             :                                  unsigned char** const queryTransformed,
    1271             :                                  unsigned char** const targetTransformed) {
    1272             :     // Alphabet is constructed from letters that are present in sequences.
    1273             :     // Each letter is assigned an ordinal number, starting from 0 up to alphabetLength - 1,
    1274             :     // and new query and target are created in which letters are replaced with their ordinal numbers.
    1275             :     // This query and target are used in all the calculations later.
    1276         500 :     *queryTransformed = static_cast<unsigned char *>(malloc(sizeof(unsigned char) * queryLength));
    1277         500 :     *targetTransformed = static_cast<unsigned char *>(malloc(sizeof(unsigned char) * targetLength));
    1278             : 
    1279         500 :     std::string alphabet = "";
    1280             : 
    1281             :     // Alphabet information, it is constructed on fly while transforming sequences.
    1282             :     // letterIdx[c] is index of letter c in alphabet.
    1283             :     unsigned char letterIdx[MAX_UCHAR + 1];
    1284             :     bool inAlphabet[MAX_UCHAR + 1]; // inAlphabet[c] is true if c is in alphabet
    1285      128500 :     for (int i = 0; i < MAX_UCHAR + 1; i++) inAlphabet[i] = false;
    1286             : 
    1287     2582775 :     for (int i = 0; i < queryLength; i++) {
    1288     2582275 :         unsigned char c = static_cast<unsigned char>(queryOriginal[i]);
    1289     2582275 :         if (!inAlphabet[c]) {
    1290        7625 :             inAlphabet[c] = true;
    1291        7625 :             letterIdx[c] = static_cast<unsigned char>(alphabet.size());
    1292        7625 :             alphabet += queryOriginal[i];
    1293             :         }
    1294     2582275 :         (*queryTransformed)[i] = letterIdx[c];
    1295             :     }
    1296     2589075 :     for (int i = 0; i < targetLength; i++) {
    1297     2588575 :         unsigned char c = static_cast<unsigned char>(targetOriginal[i]);
    1298     2588575 :         if (!inAlphabet[c]) {
    1299        1125 :             inAlphabet[c] = true;
    1300        1125 :             letterIdx[c] = static_cast<unsigned char>(alphabet.size());
    1301        1125 :             alphabet += targetOriginal[i];
    1302             :         }
    1303     2588575 :         (*targetTransformed)[i] = letterIdx[c];
    1304             :     }
    1305             : 
    1306        1000 :     return alphabet;
    1307           0 : }
    1308             : 
    1309             : 
    1310             : class MemPoolHolder {
    1311             : public:
    1312          25 :         MemPoolHolder() {
    1313          25 :                 memory::pool::initialize();
    1314          25 :         }
    1315             : 
    1316          25 :         ~MemPoolHolder() {
    1317          25 :                 if (pool) {
    1318          25 :                         memory::pool::destroy(pool);
    1319             :                 }
    1320          25 :                 memory::pool::terminate();
    1321          25 :         }
    1322             : 
    1323         500 :         memory::pool_t * getPool() {
    1324         500 :                 if (!pool) {
    1325          25 :                         pool = memory::pool::create((memory::pool_t *)nullptr);
    1326             :                 }
    1327         500 :                 return pool;
    1328             :         }
    1329             : 
    1330             : private:
    1331             :         memory::pool_t *pool = nullptr;
    1332             : };
    1333             : 
    1334             : thread_local MemPoolHolder tl_pool;
    1335             : 
    1336         500 : static void edlibFreeAlignResult(EdlibAlignResult &result) {
    1337         500 :     if (result.endLocations) free(result.endLocations);
    1338         500 :     if (result.startLocations) free(result.startLocations);
    1339         500 :     if (result.alignment) free(result.alignment);
    1340         500 : }
    1341             : 
    1342         500 : Distance::Distance(const StringView &origin, const StringView &canonical, size_t maxDistance) {
    1343         500 :         auto orig = memory::pool::acquire();
    1344             : 
    1345         500 :         auto pool = tl_pool.getPool();
    1346         500 :         memory::pool::push(pool);
    1347             : 
    1348             :         EdlibAlignConfig cfg;
    1349         500 :         cfg.k = (maxDistance == maxOf<size_t>()) ? -1 : int(maxDistance);
    1350         500 :         cfg.mode = EDLIB_MODE_NW;
    1351         500 :         cfg.task = EDLIB_TASK_PATH;
    1352             : 
    1353         500 :         auto res = edlibAlign(origin.data(), origin.size(), canonical.data(), canonical.size(), cfg);
    1354             : 
    1355         500 :         memory::pool::push(orig);
    1356         500 :         _distance = res.editDistance;
    1357         500 :         _storage.reserve(res.alignmentLength);
    1358     2626700 :         for (int i = 0; i < res.alignmentLength; ++ i) {
    1359     2626200 :                 _storage.emplace_back(Value(res.alignment[i]));
    1360             :         }
    1361         500 :         memory::pool::pop();
    1362             : 
    1363         500 :         edlibFreeAlignResult(res);
    1364             : 
    1365         500 :         memory::pool::clear(pool);
    1366         500 :         memory::pool::pop();
    1367         500 : }
    1368             : 
    1369             : }

Generated by: LCOV version 1.14