Re: mummer patches
Hi Sascha,
On 05.08.2016 16:27, Sascha Steinbiss wrote:
>> So I finally got round to submit my patches for MUMmer. One can be found
>> in the repo, half of the other is attached to this mail. Unfortunately,
>> the other half got lost in a code reindentation. Applying the changes
>> requires refactoring src/tigr/sw_align.cc for the changes in the header.
>
> Not really sure I understand what you mean... the patches in the repo's
> series all apply without a problem and the one attached to your mail
> doesn't seem to do a lot. Are you sure it is complete? Or where is the
> other half one would be required to refactor?
Right, the patch for sw_align.hh changes some structures and now the
algorithms in sw_align.cc have to be adapted, too. The attached patch
may give you an idea of the necessary changes. I may tackle the issue
next week.
>> If someone has a lot of spare time on the weekend, feel free to go ahead.
>
> Also, are the patches Debian-specific? Given that they don't seem to be
> fixes but rather improve performance, I could imagine Stefan Kurtz
> (MUMmer upstream) would probably appreciate receiving these patches as well.
No, these are not Debian specific. However, the one commited patch
applies to a tool which comes from mugsy, not mummer. If we get the
other one cleaned up, we could drop Stefan a note.
Best,
Fabian
1,2d0
< #include <math.h>
< #include <string.h>
3a2,3
> #include <math.h>
>
17a18
>
21a23,26
>
>
>
>
23,31c28,46
< static void generateDelta(const Diagonal *Diag, long int FinishCt,
< long int FinishCDi, long int N,
< vector<long int> &Delta);
<
< static inline unsigned char maxScore(const Node &);
<
< static inline long int scoreMatch(const Diagonal Diag, long int Dct,
< long int CDi, const char *A, const char *B,
< long int N, unsigned int m_o);
---
> static void generateDelta
> (const Diagonal * Diag, long int FinishCt, long int FinishCDi,
> long int N, vector<long int> & Delta);
>
>
> static inline Score * maxScore
> (Score S[3]);
>
>
> static inline long int scoreMatch
> (const Diagonal Diag, long int Dct, long int CDi,
> const char * A, const char * B, long int N, unsigned int m_o);
>
>
> static inline void scoreEdit
> (Score & curr, const long int del, const long int ins, const long int mat);
>
>
>
33,34d47
< static inline Score scoreEdit(const long int del, const long int ins,
< const long int mat);
37c50,51
< bool _alignEngine(const char *A0, long int Astart, long int &Aend,
---
> bool _alignEngine
> (const char * A0, long int Astart, long int & Aend,
53,65d66
< /*--
< The code in this function looks bad, but its performance is 'ok', as
< far as I can tell. I tried to replace it with the Gotoh() implementation
< from seqan, but that made the program *much* slower. It is probably due
< to the break off defined by `_break_len` this cuts things short, if
< there is no reasonable expectation to reach a decent alignment.
<
< -- Fabian Klötzl, 2016-03-08
< */
<
< // most common modus operandi: Forward Align
<
<
108c109,110
< if (m_o & DIRECTION_BIT) {
---
> if ( m_o & DIRECTION_BIT )
> {
113c115,117
< } else {
---
> }
> else
> {
128,135c132,139
< Diag[0].I[0].values[DELETE] = min_score;
< Diag[0].I[0].values[INSERT] = min_score;
< Diag[0].I[0].values[MATCH] = 0;
< Diag[0].I[0].maxi = MATCH; // = Diag[0].I[0].S + MATCH;
<
< Diag[0].I[0].used[DELETE] = NONE;
< Diag[0].I[0].used[INSERT] = NONE;
< Diag[0].I[0].used[MATCH] = START;
---
> Diag[0] . I[0] . S[DELETE] . value = min_score;
> Diag[0] . I[0] . S[INSERT] . value = min_score;
> Diag[0] . I[0] . S[MATCH] . value = 0;
> Diag[0] . I[0] . max = Diag[0] . I[0] . S + MATCH;
>
> Diag[0] . I[0] . S[DELETE] . used = NONE;
> Diag[0] . I[0] . S[INSERT] . used = NONE;
> Diag[0] . I[0] . S[MATCH] . used = START;
141,143c145,148
< for (Dct = 1;
< Dct <= N + M && (Dct - FinishCt) <= _break_len && lbound <= rbound;
< Dct++) {
---
> for ( Dct = 1; Dct <= N + M &&
> (Dct - FinishCt) <= _break_len &&
> lbound <= rbound; Dct++ )
> {
145c150,151
< if (Dct >= Ll) {
---
> if ( Dct >= Ll )
> {
147c153,154
< Diag = (Diagonal *)Safe_realloc(Diag, sizeof(Diagonal) * Ll);
---
> Diag = (Diagonal *) Safe_realloc
> ( Diag, sizeof(Diagonal) * Ll );
155c162,163
< Diag[Dct].I = (Node *)Safe_malloc(Ds * sizeof(Node));
---
> Diag[Dct] . I = (Node *) Safe_malloc
> ( Ds * sizeof(Node) );
161c169,170
< if (Ds > MaxL) MaxL = Ds;
---
> if ( Ds > MaxL )
> MaxL = Ds;
165c174,175
< if (Dct <= N) {
---
> if ( Dct <= N )
> {
168c178,180
< } else {
---
> }
> else
> {
182c194,195
< if (PPDct >= 0) {
---
> if ( PPDct >= 0 )
> {
186c199,200
< } else
---
> }
> else
190c204,205
< if (m_o & FORCED_BIT) high_score = min_score;
---
> if ( m_o & FORCED_BIT )
> high_score = min_score;
194c209,210
< for (CDi = lbound; CDi <= rbound; CDi++) {
---
> for ( CDi = lbound; CDi <= rbound; CDi ++ )
> {
198,200d213
< auto D_DPct = Diag[PDct];
< auto I_PDi = D_DPct.I[PDi];
<
202,217c215,233
< if (PDi >= 0 && PDi < PDs) {
< Score dummy = scoreEdit(
< I_PDi.used[DELETE] == NONE
< ? I_PDi.values[DELETE]
< : I_PDi.values[DELETE] + CONT_GAP_SCORE[_matrix_type],
< I_PDi.used[INSERT] == NONE
< ? I_PDi.values[INSERT]
< : I_PDi.values[INSERT] + OPEN_GAP_SCORE[_matrix_type],
< I_PDi.used[MATCH] == NONE
< ? I_PDi.values[MATCH]
< : I_PDi.values[MATCH] + OPEN_GAP_SCORE[_matrix_type]);
< Diag[Dct].I[Di].values[DELETE] = dummy.value;
< Diag[Dct].I[Di].used[DELETE] = dummy.used;
< } else {
< Diag[Dct].I[Di].values[DELETE] = min_score;
< Diag[Dct].I[Di].used[DELETE] = NONE;
---
> if ( PDi >= 0 && PDi < PDs )
> scoreEdit
> (Diag[Dct] . I[Di] . S[DELETE],
> Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ?
> Diag[PDct] . I[PDi] . S[DELETE] . value :
> Diag[PDct] . I[PDi] . S[DELETE] . value +
> CONT_GAP_SCORE [_matrix_type],
> Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ?
> Diag[PDct] . I[PDi] . S[INSERT] . value :
> Diag[PDct] . I[PDi] . S[INSERT] . value +
> OPEN_GAP_SCORE [_matrix_type],
> Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ?
> Diag[PDct] . I[PDi] . S[MATCH] . value :
> Diag[PDct] . I[PDi] . S[MATCH] . value +
> OPEN_GAP_SCORE [_matrix_type]);
> else
> {
> Diag[Dct] . I[Di] . S[DELETE] . value = min_score;
> Diag[Dct] . I[Di] . S[DELETE] . used = NONE;
223,239c239,257
< if (PDi >= 0 && PDi < PDs) {
< auto parent = Diag[PDct].I[PDi];
< auto dummy = scoreEdit(
< parent.used[DELETE] == NONE
< ? parent.values[DELETE]
< : parent.values[DELETE] + OPEN_GAP_SCORE[_matrix_type],
< parent.used[INSERT] == NONE
< ? parent.values[INSERT]
< : parent.values[INSERT] + CONT_GAP_SCORE[_matrix_type],
< parent.used[MATCH] == NONE
< ? parent.values[MATCH]
< : parent.values[MATCH] + OPEN_GAP_SCORE[_matrix_type]);
< Diag[Dct].I[Di].values[INSERT] = dummy.value;
< Diag[Dct].I[Di].used[INSERT] = dummy.used;
< } else {
< Diag[Dct].I[Di].values[INSERT] = min_score;
< Diag[Dct].I[Di].used[INSERT] = NONE;
---
> if ( PDi >= 0 && PDi < PDs )
> scoreEdit
> (Diag[Dct] . I[Di] . S[INSERT],
> Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ?
> Diag[PDct] . I[PDi] . S[DELETE] . value :
> Diag[PDct] . I[PDi] . S[DELETE] . value +
> OPEN_GAP_SCORE [_matrix_type],
> Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ?
> Diag[PDct] . I[PDi] . S[INSERT] . value :
> Diag[PDct] . I[PDi] . S[INSERT] . value +
> CONT_GAP_SCORE [_matrix_type],
> Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ?
> Diag[PDct] . I[PDi] . S[MATCH] . value :
> Diag[PDct] . I[PDi] . S[MATCH] . value +
> OPEN_GAP_SCORE [_matrix_type]);
> else
> {
> Diag[Dct] . I[Di] . S[INSERT] . value = min_score;
> Diag[Dct] . I[Di] . S[INSERT] . used = NONE;
243,252c261,274
< if (PPDi >= 0 && PPDi < PPDs) {
< auto dummy = scoreEdit(Diag[PPDct].I[PPDi].values[DELETE],
< Diag[PPDct].I[PPDi].values[INSERT],
< Diag[PPDct].I[PPDi].values[MATCH]);
< Diag[Dct].I[Di].values[MATCH] =
< dummy.value + scoreMatch(Diag[Dct], Dct, CDi, A, B, N, m_o);
< Diag[Dct].I[Di].used[MATCH] = dummy.used;
< } else {
< Diag[Dct].I[Di].values[MATCH] = min_score;
< Diag[Dct].I[Di].used[MATCH] = NONE;
---
> if ( PPDi >= 0 && PPDi < PPDs )
> {
> scoreEdit
> (Diag[Dct] . I[Di] . S[MATCH],
> Diag[PPDct] . I[PPDi] . S[DELETE] . value,
> Diag[PPDct] . I[PPDi] . S[INSERT] . value,
> Diag[PPDct] . I[PPDi] . S[MATCH] . value);
> Diag[Dct] . I[Di] . S[MATCH] . value +=
> scoreMatch (Diag[Dct], Dct, CDi, A, B, N, m_o);
> }
> else
> {
> Diag[Dct] . I[Di] . S[MATCH] . value = min_score;
> Diag[Dct] . I[Di] . S[MATCH] . used = NONE;
257c279
< Diag[Dct].I[Di].maxi = maxScore(Diag[Dct].I[Di]);
---
> Diag[Dct] . I[Di] . max = maxScore (Diag[Dct] . I[Di] . S);
260,262c282,284
< auto herp = Diag[Dct].I[Di];
< if (herp.values[herp.maxi] >= high_score) {
< high_score = herp.values[herp.maxi];
---
> if ( Diag[Dct] . I[Di] . max->value >= high_score )
> {
> high_score = Diag[Dct] . I[Di] . max->value;
268a291
>
270,275c293,301
< if (m_o & SEQEND_BIT && Dct >= L) {
< if (L == N) {
< if (lbound == 0) {
< auto maxi = Diag[Dct].I[0].maxi;
< if (Diag[Dct].I[0].values[maxi] >= xhigh_score) {
< xhigh_score = Diag[Dct].I[0].values[maxi];
---
> if ( m_o & SEQEND_BIT && Dct >= L )
> {
> if ( L == N )
> {
> if ( lbound == 0 )
> {
> if ( Diag[Dct] . I[0] . max->value >= xhigh_score )
> {
> xhigh_score = Diag[Dct] . I[0] . max->value;
280c306,307
< } else // L == M
---
> }
> else // L == M
282,286c309,315
< if (rbound == M) {
< auto derp = Diag[Dct].I[M - Diag[Dct].lbound];
< auto maxi = derp.maxi;
< if (derp.values[maxi] >= xhigh_score) {
< xhigh_score = derp.values[maxi];
---
> if ( rbound == M )
> {
> if ( Diag[Dct] . I[M-Diag[Dct].lbound] .
> max->value >= xhigh_score )
> {
> xhigh_score = Diag[Dct] . I[M-Diag[Dct].lbound] .
> max->value;
294,296d322
< //-- If in extender modus operandi, free soon to be greatgrandparent
< // diag
< if (m_o & SEARCH_BIT && Dct > 1) free(Diag[PPDct].I);
298c324,328
< auto current_diagonal = Diag[Dct];
---
> //-- If in extender modus operandi, free soon to be greatgrandparent diag
> if ( m_o & SEARCH_BIT && Dct > 1 )
> free ( Diag[PPDct] . I );
>
>
300,303c330,332
< for (Di = 0; Di < Ds; Di++) {
< auto current_node = current_diagonal.I[Di];
< auto maxi = current_node.maxi;
< if (high_score - current_node.values[maxi] > max_diff)
---
> for ( Di = 0; Di < Ds; Di ++ )
> {
> if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff )
308,311c337,339
< for (Di = Ds - 1; Di >= 0; Di--) {
< auto current_node = current_diagonal.I[Di];
< auto maxi = current_node.maxi;
< if (high_score - current_node.values[maxi] > max_diff)
---
> for ( Di = Ds - 1; Di >= 0; Di -- )
> {
> if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff )
318,332c346,353
< if (Dct < N && Dct < M) {
< Dl++;
< rbound++;
< Dmid = (Dct + 1) / 2.0;
< } else if (Dct >= N && Dct >= M) {
< Dl--;
< lbound--;
< Dmid = N - (Dct + 1) / 2.0;
< } else if (Dct >= N) {
< lbound--;
< Dmid = N - (Dct + 1) / 2.0;
< } else {
< rbound++;
< Dmid = (Dct + 1) / 2.0;
< }
---
> if ( Dct < N && Dct < M )
> { Dl ++; rbound ++; Dmid = (Dct+1)/2.0; }
> else if ( Dct >= N && Dct >= M )
> { Dl --; lbound --; Dmid = N - (Dct+1)/2.0; }
> else if ( Dct >= N )
> { lbound --; Dmid = N - (Dct+1)/2.0; }
> else
> { rbound ++; Dmid = (Dct+1)/2.0; }
335c356,357
< if (Dband > 0) {
---
> if ( Dband > 0 )
> {
337c359,360
< if (lbound < tlb) lbound = tlb;
---
> if ( lbound < tlb )
> lbound = tlb;
339c362,363
< if (rbound > trb) rbound = trb;
---
> if ( rbound > trb )
> rbound = trb;
342,343c366,369
< if (lbound < 0) lbound = 0;
< if (rbound >= Dl) rbound = Dl - 1;
---
> if ( lbound < 0 )
> lbound = 0;
> if ( rbound >= Dl )
> rbound = Dl - 1;
351,352c377,380
< if (Dct == N + M) {
< if (~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT) {
---
> if ( Dct == N + M )
> {
> if ( ~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT )
> {
356c384,385
< } else if (FinishCt == Dct)
---
> }
> else if ( FinishCt == Dct )
358c387,389
< } else if (m_o & SEQEND_BIT && xFinishCt != 0) {
---
> }
> else if ( m_o & SEQEND_BIT && xFinishCt != 0 )
> {
365,369c396,399
< long int Aadj =
< FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1;
< long int Badj =
< FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1;
< if (~m_o & DIRECTION_BIT) {
---
> long int Aadj = FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1;
> long int Badj = FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1;
> if ( ~m_o & DIRECTION_BIT )
> {
386,387c416
< fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt,
< TrimCt);
---
> fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, TrimCt);
390,391c419
< (long int)sizeof(Diagonal) * Dct +
< (long int)sizeof(Node) * CalcCt);
---
> (long int)sizeof(Diagonal) * Dct + (long int)sizeof(Node) * CalcCt);
394,395c422
< ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) *
< 2);
---
> ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * 2);
399c426,427
< if (~m_o & SEARCH_BIT) generateDelta(Diag, FinishCt, FinishCDi, N, Delta);
---
> if ( ~m_o & SEARCH_BIT )
> generateDelta (Diag, FinishCt, FinishCDi, N, Delta);
409,411c437,442
< static void generateDelta(const Diagonal *Diag, long int FinishCt,
< long int FinishCDi, long int N,
< vector<long int> &Delta)
---
>
>
>
> static void generateDelta
> (const Diagonal * Diag, long int FinishCt, long int FinishCDi,
> long int N, vector<long int> & Delta)
446c477
< edit = Diag[Dct].I[Di].maxi;
---
> edit = Diag[Dct] . I[Di] . max - Diag[Dct] . I[Di] . S;
449c480,481
< while (Dct >= 0) {
---
> while ( Dct >= 0 )
> {
451c483,484
< if (Pi >= PSize) {
---
> if ( Pi >= PSize )
> {
453,454c486,487
< Reverse_Path =
< (char *)Safe_realloc(Reverse_Path, sizeof(char) * PSize);
---
> Reverse_Path = (char *) Safe_realloc
> ( Reverse_Path, sizeof(char) * PSize );
458,459c491
< curr_score = Score{Diag[Dct].I[Di].values[edit],
< Diag[Dct].I[Di].used[edit]}; // FIXME
---
> curr_score = Diag[Dct] . I[Di] . S[edit];
462,464c494,501
< switch (edit) {
< case DELETE: CDi = Dct-- <= N ? CDi - 1 : CDi; break;
< case INSERT: CDi = Dct-- <= N ? CDi : CDi + 1; break;
---
> switch ( edit )
> {
> case DELETE :
> CDi = Dct -- <= N ? CDi - 1 : CDi;
> break;
> case INSERT :
> CDi = Dct -- <= N ? CDi : CDi + 1;
> break;
469c506,508
< case START: Dct = -1; break;
---
> case START :
> Dct = -1;
> break;
481,482c520,523
< for (Pi -= 2; Pi >= 0; Pi--) {
< switch (Reverse_Path[Pi]) {
---
> for (Pi -= 2; Pi >= 0; Pi --)
> {
> switch ( Reverse_Path[Pi] )
> {
491,492c532,536
< case MATCH: Count++; break;
< case START: break;
---
> case MATCH :
> Count ++;
> break;
> case START :
> break;
505c549,553
< static inline unsigned char maxScore(const Node &n)
---
>
>
>
> static inline Score * maxScore
> (Score S[3])
510,512c558,561
< if (n.values[DELETE] > n.values[INSERT]) {
< if (n.values[DELETE] > n.values[MATCH])
< return DELETE;
---
> if ( S[DELETE] . value > S[INSERT] . value )
> {
> if ( S[DELETE] . value > S[MATCH] . value )
> return S + DELETE;
514,516c563,566
< return MATCH;
< } else if (n.values[INSERT] > n.values[MATCH])
< return INSERT;
---
> return S + MATCH;
> }
> else if ( S[INSERT] . value > S[MATCH] . value )
> return S + INSERT;
518c568
< return MATCH;
---
> return S + MATCH;
521,522c571,575
< static inline Score scoreEdit(const long int del, const long int ins,
< const long int mat)
---
>
>
>
> static inline void scoreEdit
> (Score & curr, const long int del, const long int ins, const long int mat)
527,529c580,583
< Score curr = {0, 0};
< if (del > ins) {
< if (del > mat) {
---
> if ( del > ins )
> {
> if ( del > mat )
> {
532c586,588
< } else {
---
> }
> else
> {
536c592,594
< } else if (ins > mat) {
---
> }
> else if ( ins > mat )
> {
539c597,599
< } else {
---
> }
> else
> {
543c603,604
< return curr;
---
>
> return;
546,548c607,612
< static inline long int scoreMatch(const Diagonal Diag, long int Dct,
< long int CDi, const char *A, const char *B,
< long int N, unsigned int m_o)
---
>
>
>
> static inline long int scoreMatch
> (const Diagonal Diag, long int Dct, long int CDi,
> const char * A, const char * B, long int N, unsigned int m_o)
566c630,631
< if (Dct <= N) {
---
> if ( Dct <= N )
> {
569c634,636
< } else {
---
> }
> else
> {
574,575c641,644
< if (!isalpha(Ac)) Ac = STOP_CHAR;
< if (!isalpha(Bc)) Bc = STOP_CHAR;
---
> if ( ! isalpha(Ac) )
> Ac = STOP_CHAR;
> if ( ! isalpha(Bc) )
> Bc = STOP_CHAR;
Reply to: