fix for #693463, part of the zdiff man page edits are no longer needed
[debian/gzip] / lib / match.c
index 66cb1ca124b1447ae8014060bdfa92e620673f5a..ac32ed0f6e34fa5eb21bc020a94c0f370cd47f24 100644 (file)
@@ -1,11 +1,11 @@
 /* match.s -- optional optimized asm version of longest match in deflate.c
 
 /* match.s -- optional optimized asm version of longest match in deflate.c
 
-   Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2006, 2009-2012 Free Software Foundation, Inc.
    Copyright (C) 1992-1993 Jean-loup Gailly
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    Copyright (C) 1992-1993 Jean-loup Gailly
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
+   the Free Software Foundation; either version 3, or (at your option)
    any later version.
 
    This program is distributed in the hope that it will be useful,
    any later version.
 
    This program is distributed in the hope that it will be useful,
@@ -28,8 +28,6 @@
  * David Mosberger-Tang <davidm@hpl.hp.com>.
  */
 
  * David Mosberger-Tang <davidm@hpl.hp.com>.
  */
 
-/* $Id: match.c,v 1.1 2006/11/20 08:40:34 eggert Exp $ */
-
 /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
  * external symbols with an underline character '_'.
  */
 /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
  * external symbols with an underline character '_'.
  */
@@ -61,7 +59,7 @@
  * This version assumes static allocation of the arrays (-DDYN_ALLOC not used).
  */
 
  * This version assumes static allocation of the arrays (-DDYN_ALLOC not used).
  */
 
-       .file   "match.S"
+        .file   "match.S"
 
 #define MAX_MATCH      258
 #define MAX_MATCH2     $128 /* MAX_MATCH/2-1 */
 
 #define MAX_MATCH      258
 #define MAX_MATCH2     $128 /* MAX_MATCH/2-1 */
 #define    WSIZE       $32768
 #define MAX_DIST       WSIZE - MAX_MATCH - MIN_MATCH - 1
 
 #define    WSIZE       $32768
 #define MAX_DIST       WSIZE - MAX_MATCH - MIN_MATCH - 1
 
-       .globl  _match_init
-       .globl  _longest_match
+        .globl _match_init
+        .globl  _longest_match
 
 
-       .text
+        .text
 
 _match_init:
 
 _match_init:
-       ret
+        ret
 
 /*-----------------------------------------------------------------------
  * Set match_start to the longest match starting at the given string and
 
 /*-----------------------------------------------------------------------
  * Set match_start to the longest match starting at the given string and
@@ -92,8 +90,8 @@ _longest_match:       /* int longest_match(cur_match) */
      /* return address */               /* esp+16 */
         push    %ebp                    /* esp+12 */
         push    %edi                    /* esp+8  */
      /* return address */               /* esp+16 */
         push    %ebp                    /* esp+12 */
         push    %edi                    /* esp+8  */
-       push    %esi                    /* esp+4  */
-       push    %ebx                    /* esp    */
+        push   %esi                    /* esp+4  */
+        push    %ebx                   /* esp    */
 
 /*
  *      match        equ esi
 
 /*
  *      match        equ esi
@@ -102,21 +100,21 @@ _longest_match:   /* int longest_match(cur_match) */
  *      best_len     equ ebx
  *      limit        equ edx
  */
  *      best_len     equ ebx
  *      limit        equ edx
  */
-       mov     cur_match,%esi
+        mov    cur_match,%esi
         mov    _max_chain_length,%ebp /* chain_length = max_chain_length */
         mov    _max_chain_length,%ebp /* chain_length = max_chain_length */
-       mov     _strstart,%edi
-       mov     %edi,%edx
-       sub     MAX_DIST,%edx          /* limit = strstart-MAX_DIST */
-       jae     limit_ok
-       sub     %edx,%edx              /* limit = NIL */
+        mov    _strstart,%edi
+        mov    %edi,%edx
+        sub    MAX_DIST,%edx          /* limit = strstart-MAX_DIST */
+        jae    limit_ok
+        sub    %edx,%edx              /* limit = NIL */
 limit_ok:
         add    $2+_window,%edi        /* edi = offset(window+strstart+2) */
         mov    _prev_length,%ebx      /* best_len = prev_length */
         movw   -3(%ebx,%edi),%ax      /* ax = scan[best_len-1..best_len] */
         movw   -2(%edi),%cx           /* cx = scan[0..1] */
 limit_ok:
         add    $2+_window,%edi        /* edi = offset(window+strstart+2) */
         mov    _prev_length,%ebx      /* best_len = prev_length */
         movw   -3(%ebx,%edi),%ax      /* ax = scan[best_len-1..best_len] */
         movw   -2(%edi),%cx           /* cx = scan[0..1] */
-       cmp     _good_match,%ebx       /* do we have a good match already? */
+        cmp    _good_match,%ebx       /* do we have a good match already? */
         jb      do_scan
         jb      do_scan
-       shr     $2,%ebp                /* chain_length >>= 2 */
+        shr    $2,%ebp                /* chain_length >>= 2 */
         jmp     do_scan
 
         .align  4
         jmp     do_scan
 
         .align  4
@@ -152,8 +150,8 @@ mismatch:
         subb    -2(%esi),%cl        /* cl = 0 if first bytes equal */
         xchg    %edi,%eax           /* edi = scan+2, eax = end of scan */
         sub     %edi,%eax           /* eax = len */
         subb    -2(%esi),%cl        /* cl = 0 if first bytes equal */
         xchg    %edi,%eax           /* edi = scan+2, eax = end of scan */
         sub     %edi,%eax           /* eax = len */
-       sub     %eax,%esi           /* esi = cur_match + 2 + offset(window) */
-       sub     $2+_window,%esi     /* esi = cur_match */
+        sub    %eax,%esi           /* esi = cur_match + 2 + offset(window) */
+        sub    $2+_window,%esi     /* esi = cur_match */
         subb    $1,%cl              /* set carry if cl == 0 (cannot use DEC) */
         adc     $0,%eax             /* eax = carry ? len+1 : len */
         cmp     %ebx,%eax           /* len > best_len ? */
         subb    $1,%cl              /* set carry if cl == 0 (cannot use DEC) */
         adc     $0,%eax             /* eax = carry ? len+1 : len */
         cmp     %ebx,%eax           /* len > best_len ? */
@@ -164,7 +162,7 @@ mismatch:
         jl      long_loop
 the_end:
         mov     %ebx,%eax               /* result = eax = best_len */
         jl      long_loop
 the_end:
         mov     %ebx,%eax               /* result = eax = best_len */
-       pop     %ebx
+        pop     %ebx
         pop     %esi
         pop     %edi
         pop     %ebp
         pop     %esi
         pop     %edi
         pop     %ebp
@@ -276,15 +274,15 @@ maxmatch:
 #define WSIZE          32768
 #define MAX_DIST       (WSIZE - MAX_MATCH - MIN_MATCH - 1)
 
 #define WSIZE          32768
 #define MAX_DIST       (WSIZE - MAX_MATCH - MIN_MATCH - 1)
 
-       GLOBAL  (_match_init)
-       GLOBAL  (_longest_match)
+        GLOBAL (_match_init)
+        GLOBAL (_longest_match)
 
 
-       TEXT
+        TEXT
 
 
-       FILE    ("match.S")
+        FILE   ("match.S")
 
 _match_init:
 
 _match_init:
-       rts
+        rts
 
 /*-----------------------------------------------------------------------
  * Set match_start to the longest match starting at the given string and
 
 /*-----------------------------------------------------------------------
  * Set match_start to the longest match starting at the given string and
@@ -306,91 +304,91 @@ _match_init:
 #endif
 
 _longest_match:
 #endif
 
 _longest_match:
-       movel   IndBase(4,Stack_Pointer),Cur_Match
-       moveml  imm(pushreg),predec(Stack_Pointer)
-       movel   _max_chain_length,Chain_Length
-       movel   _prev_length,Best_Len
-       movel   imm(_prev),Prev_Address
-       movel   imm(_window+MIN_MATCH),Match_Ini
-       movel   _strstart,Limit
-       movel   Match_Ini,Scan_Ini
-       addl    Limit,Scan_Ini
-       subw    imm(MAX_DIST),Limit
-       bhis    L__limit_ok
-       clrl    Limit
+        movel  IndBase(4,Stack_Pointer),Cur_Match
+        moveml imm(pushreg),predec(Stack_Pointer)
+        movel  _max_chain_length,Chain_Length
+        movel  _prev_length,Best_Len
+        movel  imm(_prev),Prev_Address
+        movel  imm(_window+MIN_MATCH),Match_Ini
+        movel  _strstart,Limit
+        movel  Match_Ini,Scan_Ini
+        addl   Limit,Scan_Ini
+        subw   imm(MAX_DIST),Limit
+        bhis   L__limit_ok
+        clrl   Limit
 L__limit_ok:
 L__limit_ok:
-       cmpl    invert_maybe(_good_match,Best_Len)
-       blos    L__length_ok
-       lsrl    imm(2),Chain_Length
+        cmpl   invert_maybe(_good_match,Best_Len)
+        blos   L__length_ok
+        lsrl   imm(2),Chain_Length
 L__length_ok:
 L__length_ok:
-       subql   imm(1),Chain_Length
+        subql  imm(1),Chain_Length
 #ifdef UNALIGNED_OK
 #ifdef UNALIGNED_OK
-       movew   IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
-       movew   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
+        movew  IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
+        movew  IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
 #else
 #else
-       moveb   IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
-       lslw    imm(8),Scan_Start
-       moveb   IndBase(-MIN_MATCH+1,Scan_Ini),Scan_Start
-       moveb   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
-       lslw    imm(8),Scan_End
-       moveb   IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
+        moveb  IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
+        lslw   imm(8),Scan_Start
+        moveb  IndBase(-MIN_MATCH+1,Scan_Ini),Scan_Start
+        moveb  IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
+        lslw   imm(8),Scan_End
+        moveb  IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
 #endif
 #endif
-       bras    L__do_scan
+        bras   L__do_scan
 
 L__long_loop:
 #ifdef UNALIGNED_OK
 
 L__long_loop:
 #ifdef UNALIGNED_OK
-       movew   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
+        movew  IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
 #else
 #else
-       moveb   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
-       lslw    imm(8),Scan_End
-       moveb   IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
+        moveb  IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
+        lslw   imm(8),Scan_End
+        moveb  IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
 #endif
 
 L__short_loop:
 #endif
 
 L__short_loop:
-       lslw    imm(1),Cur_Match
-       movew   IndBaseNdxl(0,Prev_Address,Cur_Match),Cur_Match
-       cmpw    invert_maybe(Limit,Cur_Match)
-       dbls    Chain_Length,L__do_scan
-       bras    L__return
+        lslw   imm(1),Cur_Match
+        movew  IndBaseNdxl(0,Prev_Address,Cur_Match),Cur_Match
+        cmpw   invert_maybe(Limit,Cur_Match)
+        dbls   Chain_Length,L__do_scan
+        bras   L__return
 
 L__do_scan:
 
 L__do_scan:
-       movel   Match_Ini,Match
-       addl    Cur_Match,Match
+        movel  Match_Ini,Match
+        addl   Cur_Match,Match
 #ifdef UNALIGNED_OK
 #ifdef UNALIGNED_OK
-       cmpw    invert_maybe(IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_End)
-       bne     L__short_loop
-       cmpw    invert_maybe(IndBase(-MIN_MATCH,Match),Scan_Start)
-       bne     L__short_loop
+        cmpw   invert_maybe(IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_End)
+        bne    L__short_loop
+        cmpw   invert_maybe(IndBase(-MIN_MATCH,Match),Scan_Start)
+        bne    L__short_loop
 #else
 #else
-       moveb   IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_Test
-       lslw    imm(8),Scan_Test
-       moveb   IndBaseNdxw(-MIN_MATCH,Match,Best_Len),Scan_Test
-       cmpw    invert_maybe(Scan_Test,Scan_End)
-       bne     L__short_loop
-       moveb   IndBase(-MIN_MATCH,Match),Scan_Test
-       lslw    imm(8),Scan_Test
-       moveb   IndBase(-MIN_MATCH+1,Match),Scan_Test
-       cmpw    invert_maybe(Scan_Test,Scan_Start)
-       bne     L__short_loop
+        moveb  IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_Test
+        lslw   imm(8),Scan_Test
+        moveb  IndBaseNdxw(-MIN_MATCH,Match,Best_Len),Scan_Test
+        cmpw   invert_maybe(Scan_Test,Scan_End)
+        bne    L__short_loop
+        moveb  IndBase(-MIN_MATCH,Match),Scan_Test
+        lslw   imm(8),Scan_Test
+        moveb  IndBase(-MIN_MATCH+1,Match),Scan_Test
+        cmpw   invert_maybe(Scan_Test,Scan_Start)
+        bne    L__short_loop
 #endif
 
 #endif
 
-       movew   imm((MAX_MATCH-MIN_MATCH+1)-1),Loop_Counter
-       movel   Scan_Ini,Scan
+        movew  imm((MAX_MATCH-MIN_MATCH+1)-1),Loop_Counter
+        movel  Scan_Ini,Scan
 L__scan_loop:
 L__scan_loop:
-       cmpmb   postinc(Match),postinc(Scan)
-       dbne    Loop_Counter,L__scan_loop
-
-       subl    Scan_Ini,Scan
-       addql   imm(MIN_MATCH-1),Scan
-       cmpl    invert_maybe(Best_Len,Scan)
-       bls     L__short_loop
-       movel   Scan,Best_Len
-       movel   Cur_Match,_match_start
-       cmpl    invert_maybe(_nice_match,Best_Len)
-       blos    L__long_loop
+        cmpmb  postinc(Match),postinc(Scan)
+        dbne   Loop_Counter,L__scan_loop
+
+        subl   Scan_Ini,Scan
+        addql  imm(MIN_MATCH-1),Scan
+        cmpl   invert_maybe(Best_Len,Scan)
+        bls    L__short_loop
+        movel  Scan,Best_Len
+        movel  Cur_Match,_match_start
+        cmpl   invert_maybe(_nice_match,Best_Len)
+        blos   L__long_loop
 L__return:
 L__return:
-       moveml  postinc(Stack_Pointer),imm(popreg)
-       rts
+        moveml postinc(Stack_Pointer),imm(popreg)
+        rts
 
 #else
 
 
 #else
 
@@ -495,188 +493,188 @@ L__return:
 #define cgeu                   cmp.ge.unc
 #define cneu                   cmp.ne.unc
 
 #define cgeu                   cmp.ge.unc
 #define cneu                   cmp.ne.unc
 
-       .global longest_match
-       .proc longest_match
-       .align 32
+        .global longest_match
+        .proc longest_match
+        .align 32
 longest_match:
 // --  Cycle: 0
 longest_match:
 // --  Cycle: 0
-       .prologue
+        .prologue
 {.mmi
 {.mmi
-        alloc r2=ar.pfs,R_INPUT,R_LOCAL,R_OUTPUT,R_ROTATING
-       .rotr scan[MLAT+2], match[MLAT+2], shscan0[SHLAT+1], \
-             shscan1[SHLAT+1], shmatch0[SHLAT+1], shmatch1[SHLAT+1]
-       .rotp lc[MLAT+SHLAT+2]
-       mova s_vspec_cmatch=in0 // cur_match from input register
-       add s_tm1=@gprel(strstart),gp // a(a(strstart))
+         alloc r2=ar.pfs,R_INPUT,R_LOCAL,R_OUTPUT,R_ROTATING
+        .rotr scan[MLAT+2], match[MLAT+2], shscan0[SHLAT+1], \
+              shscan1[SHLAT+1], shmatch0[SHLAT+1], shmatch1[SHLAT+1]
+        .rotp lc[MLAT+SHLAT+2]
+        mova s_vspec_cmatch=in0 // cur_match from input register
+        add s_tm1=@gprel(strstart),gp // a(a(strstart))
 }{.mmi
 }{.mmi
-       add s_tm3=@gprel(prev_length),gp // a(a(prev_length))
-       add s_tm5=@ltoff(window),gp // a(a(window))
-       add s_tm6=@ltoff(prev),gp // a(a(prev))
-       ;;
+        add s_tm3=@gprel(prev_length),gp // a(a(prev_length))
+        add s_tm5=@ltoff(window),gp // a(a(window))
+        add s_tm6=@ltoff(prev),gp // a(a(prev))
+        ;;
 }{.mmb //  Cycle: 1
 }{.mmb //  Cycle: 1
-       ld4 s_vstrstart=[s_tm1] // strstart
-       ld4 s_vbestlen=[s_tm3] // best_len = prev_length
-       brp.loop.imp .cmploop,.cmploop+48
+        ld4 s_vstrstart=[s_tm1] // strstart
+        ld4 s_vbestlen=[s_tm3] // best_len = prev_length
+        brp.loop.imp .cmploop,.cmploop+48
 }{.mli
 }{.mli
-       add s_tm2=@gprel(max_chain_length),gp // a(a(max_chain_length))
-       movl s_wmask=WSIZE-1
-       ;;
+        add s_tm2=@gprel(max_chain_length),gp // a(a(max_chain_length))
+        movl s_wmask=WSIZE-1
+        ;;
 }{.mmi //  Cycle: 2
 }{.mmi //  Cycle: 2
-       ld8 s_aprev=[s_tm6] // a(prev)
-       ld8 s_awindow=[s_tm5] // a(window)
-       .save pr, s_prsave
-       movi0 s_prsave=pr // save predicates
+        ld8 s_aprev=[s_tm6] // a(prev)
+        ld8 s_awindow=[s_tm5] // a(window)
+        .save pr, s_prsave
+        movi0 s_prsave=pr // save predicates
 }{.mmi
 }{.mmi
-       add s_tm4=@gprel(good_match),gp // a(a(good_match))
-       add s_tm7=@ltoff(nice_match),gp // a(a(nice_match))
-       add s_tm8=@ltoff(match_start),gp // a(match_start)
-       ;;
+        add s_tm4=@gprel(good_match),gp // a(a(good_match))
+        add s_tm7=@ltoff(nice_match),gp // a(a(nice_match))
+        add s_tm8=@ltoff(match_start),gp // a(match_start)
+        ;;
 }{.mmi //  Cycle: 3
 }{.mmi //  Cycle: 3
-       ld8 s_anicematch=[s_tm7] // a(nice_match)
-       ld8 s_amatchstart=[s_tm8] // a(match_start)
-       .save ar.lc, s_lcsave
-       movi0 s_lcsave=ar.lc // save loop count register
+        ld8 s_anicematch=[s_tm7] // a(nice_match)
+        ld8 s_amatchstart=[s_tm8] // a(match_start)
+        .save ar.lc, s_lcsave
+        movi0 s_lcsave=ar.lc // save loop count register
 }{.mmi
 }{.mmi
-       .body
-       add s_tm1=-(MAX_MATCH + MIN_MATCH),s_wmask // maxdist
-       cmp.eq p_ll,p0=r0,r0 // parallel compare initialized as 'true'
-       mova s_vcurmatch=s_vspec_cmatch
-       ;;
+        .body
+        add s_tm1=-(MAX_MATCH + MIN_MATCH),s_wmask // maxdist
+        cmp.eq p_ll,p0=r0,r0 // parallel compare initialized as 'true'
+        mova s_vcurmatch=s_vspec_cmatch
+        ;;
 }{.mmi //  Cycle: 4
 }{.mmi //  Cycle: 4
-       ld4 s_vchainlen=[s_tm2] // chain_length=max_chain_length
-       ld4 s_tm4=[s_tm4] // v(good_match)
-       add s_ascan=s_awindow,s_vstrstart // scan=window + strstart
+        ld4 s_vchainlen=[s_tm2] // chain_length=max_chain_length
+        ld4 s_tm4=[s_tm4] // v(good_match)
+        add s_ascan=s_awindow,s_vstrstart // scan=window + strstart
 }{.mmi
 }{.mmi
-       sub s_vlimit=s_vstrstart, s_tm1 // limit=strstart - MAX_DIST
-       add s_amatch=s_awindow,s_vspec_cmatch // match=window + cur_match
-       and s_vspec_cmatch =s_vspec_cmatch,s_wmask
-       ;;
+        sub s_vlimit=s_vstrstart, s_tm1 // limit=strstart - MAX_DIST
+        add s_amatch=s_awindow,s_vspec_cmatch // match=window + cur_match
+        and s_vspec_cmatch =s_vspec_cmatch,s_wmask
+        ;;
 }{.mmi //  Cycle: 5
 }{.mmi //  Cycle: 5
-       add s_amatblen=s_amatch,s_vbestlen //
-       cneu p_bn2,p0=2,s_vbestlen // set if bestlen != 2
-       add s_ascanend=s_ascan,s_vbestlen // compute a(scan) + best_len
+        add s_amatblen=s_amatch,s_vbestlen //
+        cneu p_bn2,p0=2,s_vbestlen // set if bestlen != 2
+        add s_ascanend=s_ascan,s_vbestlen // compute a(scan) + best_len
 }{.mmi
 }{.mmi
-       ld1 s_vscan0=[s_ascan],1 // NB: s_ascan++
-       ld1 s_vmatch0=[s_amatch],1
-       cgtu p0,p_no=s_vlimit,r0 // is result positive ?
-       ;;
+        ld1 s_vscan0=[s_ascan],1 // NB: s_ascan++
+        ld1 s_vmatch0=[s_amatch],1
+        cgtu p0,p_no=s_vlimit,r0 // is result positive ?
+        ;;
 }{.mmi //  Cycle: 6
 }{.mmi //  Cycle: 6
-       ld1.nt1 s_vscan1=[s_ascan],2 // NB: s_ascan+3 in total
-       ld1.nt1 s_vmatch1=[s_amatch],2
-       add s_awinbest=s_awindow,s_vbestlen //
-       ;;
+        ld1.nt1 s_vscan1=[s_ascan],2 // NB: s_ascan+3 in total
+        ld1.nt1 s_vmatch1=[s_amatch],2
+        add s_awinbest=s_awindow,s_vbestlen //
+        ;;
 }{.mmi //  Cycle: 7
 }{.mmi //  Cycle: 7
-       ld1.nt1 s_vscanend=[s_ascanend],-1 // scan_end=scan[best_len]
-       ld1.nt1 s_vmatbst=[s_amatblen],-1
+        ld1.nt1 s_vscanend=[s_ascanend],-1 // scan_end=scan[best_len]
+        ld1.nt1 s_vmatbst=[s_amatblen],-1
 (p_no) mova s_vlimit=r0
 (p_no) mova s_vlimit=r0
-       ;;
+        ;;
 }{.mmi //  Cycle: 8
 (p_bn2)        ld1.nt1 s_vscanend1=[s_ascanend],1 // scan_end1=scan[best_len-1]
 (p_bn2)        ld1.nt1 s_vmatbst1=[s_amatblen]
 }{.mmi //  Cycle: 8
 (p_bn2)        ld1.nt1 s_vscanend1=[s_ascanend],1 // scan_end1=scan[best_len-1]
 (p_bn2)        ld1.nt1 s_vmatbst1=[s_amatblen]
-       shladd s_vspec_cmatch =s_vspec_cmatch,1,s_aprev
+        shladd s_vspec_cmatch =s_vspec_cmatch,1,s_aprev
 }{.mmi
 }{.mmi
-       cgeu p_shf,p0=s_vbestlen,s_tm4 // is (prev_length >= good_match) ?
-       ;;
+        cgeu p_shf,p0=s_vbestlen,s_tm4 // is (prev_length >= good_match) ?
+        ;;
 }{.mmi //  Cycle: 9
 }{.mmi //  Cycle: 9
-       ld1.nt1 s_vscan3=[s_ascan]
-       ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]
-       mova    s_vlen=3
+        ld1.nt1 s_vscan3=[s_ascan]
+        ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]
+        mova   s_vlen=3
 }{.mmi
 (p_shf)        shr.u s_vchainlen=s_vchainlen,2 // (cur_len) >> 2
 }{.mmi
 (p_shf)        shr.u s_vchainlen=s_vchainlen,2 // (cur_len) >> 2
-       ;;
+        ;;
 }{.mmi //  Cycle: 10
 }{.mmi //  Cycle: 10
-       ld1.nt1 s_vmatch3=[s_amatch]
-       // p_ll switched on as soon as we get a mismatch:
-       cmp.eq.and p_ll,p0=s_vmatch0,s_vscan0
-       cmp.eq.and p_ll,p0=s_vmatbst,s_vscanend
+        ld1.nt1 s_vmatch3=[s_amatch]
+        // p_ll switched on as soon as we get a mismatch:
+        cmp.eq.and p_ll,p0=s_vmatch0,s_vscan0
+        cmp.eq.and p_ll,p0=s_vmatbst,s_vscanend
 }{.mib
 }{.mib
-       cmp.eq.and p_ll,p0=s_vmatch1,s_vscan1
+        cmp.eq.and p_ll,p0=s_vmatch1,s_vscan1
 (p_bn2)        cmp.eq.and p_ll,p0=s_vmatbst1,s_vscanend1
 (p_ll) br.cond.dpnt.many .test_more
 (p_bn2)        cmp.eq.and p_ll,p0=s_vmatbst1,s_vscanend1
 (p_ll) br.cond.dpnt.many .test_more
-       ;;
+        ;;
 }
 
 .next_iter:
 {.mmi  // Cycle 0
 }
 
 .next_iter:
 {.mmi  // Cycle 0
-       add s_amatch=s_awindow,s_vspec_cmatch   // match=window + cur_match
-       mov s_vcurmatch=s_vspec_cmatch          // current value
-       add s_vchainlen=-1,s_vchainlen          // --chain_length
+        add s_amatch=s_awindow,s_vspec_cmatch          // match=window + cur_match
+        mov s_vcurmatch=s_vspec_cmatch         // current value
+        add s_vchainlen=-1,s_vchainlen                 // --chain_length
 }{.mib
 }{.mib
-       cmp.le.unc p_end,p0=s_vspec_cmatch,s_vlimit
-       and s_vspec_cmatch=s_vspec_cmatch,s_wmask
+        cmp.le.unc p_end,p0=s_vspec_cmatch,s_vlimit
+        and s_vspec_cmatch=s_vspec_cmatch,s_wmask
 (p_end)        br.cond.dptk.many .terminate
 (p_end)        br.cond.dptk.many .terminate
-       ;;
+        ;;
 }{.mmi // Cycle 1
 }{.mmi // Cycle 1
-       ld1 s_vmatch0=[s_amatch],1              // load match[0]
-       // compute prev[cur_match]:
-       shladd s_vspec_cmatch=s_vspec_cmatch,1,s_aprev
-       cmp.eq.unc p_end,p0=s_vchainlen,r0
+        ld1 s_vmatch0=[s_amatch],1             // load match[0]
+        // compute prev[cur_match]:
+        shladd s_vspec_cmatch=s_vspec_cmatch,1,s_aprev
+        cmp.eq.unc p_end,p0=s_vchainlen,r0
 } {.mib
 } {.mib
-       nop.m 0
-       add s_amatblen=s_awinbest,s_vcurmatch   // match=window + cur_match
+        nop.m 0
+        add s_amatblen=s_awinbest,s_vcurmatch  // match=window + cur_match
 (p_end)        br.cond.dptk.many .terminate
 (p_end)        br.cond.dptk.many .terminate
-       ;;
+        ;;
 }{.mmi // Cycle 2 (short)
 }{.mmi // Cycle 2 (short)
-       ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]         // get next cur_match
-       ;;
+        ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]                // get next cur_match
+        ;;
 }{.mmi // Cycle 3 (short)
 }{.mmi // Cycle 3 (short)
-       ld1.nt1 s_vmatbst=[s_amatblen],-1       // load match[best_len]
-       cmp.ne.unc p_ll,p0=r0,r0     // parallel compare initialized as 'false'
-       ;;
+        ld1.nt1 s_vmatbst=[s_amatblen],-1      // load match[best_len]
+        cmp.ne.unc p_ll,p0=r0,r0     // parallel compare initialized as 'false'
+        ;;
 }{.mmi // Cycle 4 (short)
 }{.mmi // Cycle 4 (short)
-       // load match[1] - - note: match += 3 (in total):
-       ld1.nt1 s_vmatch1=[s_amatch],2
-       ;;
-       // Cycle 5  (short)
+        // load match[1] - - note: match += 3 (in total):
+        ld1.nt1 s_vmatch1=[s_amatch],2
+        ;;
+        // Cycle 5  (short)
 (p_bn2)        ld1.nt1 s_vmatbst1=[s_amatblen]         // load match[best_len-1]
 }{.mib // Here we (MOST LIKELY) pay a L2-fetch stall
 (p_bn2)        ld1.nt1 s_vmatbst1=[s_amatblen]         // load match[best_len-1]
 }{.mib // Here we (MOST LIKELY) pay a L2-fetch stall
-       // p_ll switched on as soon as we get a mismatch:
-       cmp.ne.or p_ll,p0=s_vmatch0,s_vscan0
-       cmp.ne.or p_ll,p0=s_vmatbst,s_vscanend
+        // p_ll switched on as soon as we get a mismatch:
+        cmp.ne.or p_ll,p0=s_vmatch0,s_vscan0
+        cmp.ne.or p_ll,p0=s_vmatbst,s_vscanend
 (p_ll) br.cond.dptk.many .next_iter
 (p_ll) br.cond.dptk.many .next_iter
-       ;;
+        ;;
 }{.mmi // Cycle 6
 }{.mmi // Cycle 6
-       ld1.nt1 s_vmatch3=[s_amatch]
-       mova s_vlen=3
-       nop.i 0
+        ld1.nt1 s_vmatch3=[s_amatch]
+        mova s_vlen=3
+        nop.i 0
 }{.mib
 }{.mib
-       cmp.ne.or p_ll,p0=s_vmatch1,s_vscan1
+        cmp.ne.or p_ll,p0=s_vmatch1,s_vscan1
 (p_bn2)        cmp.ne.or p_ll,p0=s_vmatbst1,s_vscanend1
 (p_ll) br.cond.dptk.many .next_iter
 (p_bn2)        cmp.ne.or p_ll,p0=s_vmatbst1,s_vscanend1
 (p_ll) br.cond.dptk.many .next_iter
-       ;;
+        ;;
 }
 
 // We have passed the first hurdle - Are there additional matches ???
 
 .test_more:
 {.mmi  // Cycle 0
 }
 
 // We have passed the first hurdle - Are there additional matches ???
 
 .test_more:
 {.mmi  // Cycle 0
-       and s_tm3=7,s_ascan                     // get byte offset
-       and s_tm4=7,s_amatch                    // get byte offset
-       movi0 ar.ec=MLAT+SHLAT+2                // NB: One trip more than usual
+        and s_tm3=7,s_ascan                    // get byte offset
+        and s_tm4=7,s_amatch                   // get byte offset
+        movi0 ar.ec=MLAT+SHLAT+2               // NB: One trip more than usual
 }{.mib
 }{.mib
-       cmp.ne.unc p_no,p0=s_vscan3,s_vmatch3   // does not next one differ?
+        cmp.ne.unc p_no,p0=s_vscan3,s_vmatch3  // does not next one differ?
 (p_no)  br.cond.dptk.many .only3
 (p_no)  br.cond.dptk.many .only3
-       ;;
+        ;;
 }{.mmi // Cycle 1
 }{.mmi // Cycle 1
-       and s_tm1=-8,s_ascan    // get aligned address
-       shladd s_tm3=s_tm3,3,r0
-       movi0 ar.lc=31          // 32 times around the loop (8B at a time)
+        and s_tm1=-8,s_ascan   // get aligned address
+        shladd s_tm3=s_tm3,3,r0
+        movi0 ar.lc=31         // 32 times around the loop (8B at a time)
 }{.mib
 }{.mib
-       and s_tm2=-8,s_amatch                   // get aligned address
-       shladd s_tm4=s_tm4,3,r0
-       nop.b 0
-       ;;
+        and s_tm2=-8,s_amatch                  // get aligned address
+        shladd s_tm4=s_tm4,3,r0
+        nop.b 0
+        ;;
 }{.mmi // Cycle 2
 }{.mmi // Cycle 2
-       ld8.nt1 scan[1]=[s_tm1],8                       // load first chunk
-       sub s_tm5=64,s_tm3                              // 64 - amount
-       movi0 pr.rot=1<<16
+        ld8.nt1 scan[1]=[s_tm1],8                      // load first chunk
+        sub s_tm5=64,s_tm3                             // 64 - amount
+        movi0 pr.rot=1<<16
 }{.mmi
 }{.mmi
-       ld8.nt1 match[1]=[s_tm2],8      // load first chunk
-       sub s_tm6=64,s_tm4              // 64 - amount
-       add s_vlen=-8,s_vlen            // will be updated at least once
-       ;;
+        ld8.nt1 match[1]=[s_tm2],8     // load first chunk
+        sub s_tm6=64,s_tm4             // 64 - amount
+        add s_vlen=-8,s_vlen           // will be updated at least once
+        ;;
 }
 }
-       .align 32
+        .align 32
 .cmploop:
 {.mmi  // Cycle 0
 (lc[0])                        ld8 scan[0]=[s_tm1],8           // next scan chunk
 .cmploop:
 {.mmi  // Cycle 0
 (lc[0])                        ld8 scan[0]=[s_tm1],8           // next scan chunk
@@ -686,84 +684,84 @@ longest_match:
 (lc[MLAT+SHLAT+1])     cmp.ne.unc p_no,p0=s_tm7,s_tm8  // break search if !=
 (lc[MLAT])             first shmatch0[0]=match[MLAT+1],s_tm4
 (p_no)                 br.cond.dpnt.many .mismatch
 (lc[MLAT+SHLAT+1])     cmp.ne.unc p_no,p0=s_tm7,s_tm8  // break search if !=
 (lc[MLAT])             first shmatch0[0]=match[MLAT+1],s_tm4
 (p_no)                 br.cond.dpnt.many .mismatch
-                       ;;
+                        ;;
 }{.mii  // Cycle 1
 (lc[0])                        ld8 match[0]=[s_tm2],8
 }{.mii  // Cycle 1
 (lc[0])                        ld8 match[0]=[s_tm2],8
-                       // shift left(le) or right(be):
+                        // shift left(le) or right(be):
 (lc[MLAT])             second shscan1[0]=scan[MLAT],s_tm5
 (lc[MLAT])             second shmatch1[0]=match[MLAT],s_tm6
 }{.mmb
 (lc[MLAT+SHLAT])       or s_tm7=shscan0[SHLAT],shscan1[SHLAT]
 (lc[MLAT+SHLAT])       or s_tm8=shmatch0[SHLAT],shmatch1[SHLAT]
 (lc[MLAT])             second shscan1[0]=scan[MLAT],s_tm5
 (lc[MLAT])             second shmatch1[0]=match[MLAT],s_tm6
 }{.mmb
 (lc[MLAT+SHLAT])       or s_tm7=shscan0[SHLAT],shscan1[SHLAT]
 (lc[MLAT+SHLAT])       or s_tm8=shmatch0[SHLAT],shmatch1[SHLAT]
-                       br.ctop.dptk.many .cmploop
-                       ;;
+                        br.ctop.dptk.many .cmploop
+                        ;;
 }{.mfi
 }{.mfi
-       mov s_vlen=258
-       nop.f 0
+        mov s_vlen=258
+        nop.f 0
 }{.mfi
 }{.mfi
-       nop.f 0    // realign
-       ;;
+        nop.f 0    // realign
+        ;;
 }
 .mismatch:
 {.mii  // Cycle 0 (short)
 (p_no) pcmp1.eq s_tm2=s_tm7,s_tm8      // find first non-matching character
 }
 .mismatch:
 {.mii  // Cycle 0 (short)
 (p_no) pcmp1.eq s_tm2=s_tm7,s_tm8      // find first non-matching character
-       nop.i 0
-       ;;
-       // Cycle 1 (short)
+        nop.i 0
+        ;;
+        // Cycle 1 (short)
 (p_no) count s_tm1=s_tm2
 (p_no) count s_tm1=s_tm2
-       ;;
+        ;;
 }{.mib // Cycle 2 (short)
 (p_no) add s_vlen=s_vlen,s_tm1         // effective length
 }{.mib // Cycle 2 (short)
 (p_no) add s_vlen=s_vlen,s_tm1         // effective length
-       nop.i 0
-       clrrrb
-       ;;
+        nop.i 0
+        clrrrb
+        ;;
 }
 
 .only3:
 {.mib  // Cycle 0 (short)
 }
 
 .only3:
 {.mib  // Cycle 0 (short)
-       cmp.gt.unc p0,p_nbs=s_vlen,s_vbestlen           // (len > best_len) ?
+        cmp.gt.unc p0,p_nbs=s_vlen,s_vbestlen          // (len > best_len) ?
 (p_nbs)        br.cond.dpnt.many .next_iter                    // if not, re-iternate
 (p_nbs)        br.cond.dpnt.many .next_iter                    // if not, re-iternate
-       ;;
+        ;;
 }{.mmi // Cycle 1 (short)
 }{.mmi // Cycle 1 (short)
-       ld4 s_tm7=[s_anicematch]                        // nice_match
-       st4 [s_amatchstart]= s_vcurmatch
-       add s_ascanend=s_ascan,s_vlen                   // reset with best_len
-       ;;
+        ld4 s_tm7=[s_anicematch]                       // nice_match
+        st4 [s_amatchstart]= s_vcurmatch
+        add s_ascanend=s_ascan,s_vlen                  // reset with best_len
+        ;;
 }{.mmi // Cycle 2 (short)
 }{.mmi // Cycle 2 (short)
-       mova s_vbestlen=s_vlen
-       add s_ascanend=-3,s_ascanend            // remember extra offset
-       ;;
+        mova s_vbestlen=s_vlen
+        add s_ascanend=-3,s_ascanend           // remember extra offset
+        ;;
 }{.mmi // Cycle 3 (short)
 }{.mmi // Cycle 3 (short)
-       ld1 s_vscanend=[s_ascanend],-1          // scan_end=scan[best_len]
-       add s_awinbest=s_awindow,s_vbestlen     // update with new best_len
-       cmp.ne.unc p_bn2,p0=2,s_vbestlen        // set if bestlen != 2
-       ;;
+        ld1 s_vscanend=[s_ascanend],-1         // scan_end=scan[best_len]
+        add s_awinbest=s_awindow,s_vbestlen    // update with new best_len
+        cmp.ne.unc p_bn2,p0=2,s_vbestlen       // set if bestlen != 2
+        ;;
 }{.mib // Cycle 4 (short)
 }{.mib // Cycle 4 (short)
-       // scan_end1=scan[best_len-1] NB: s_ascanend reset:
-       ld1.nt1 s_vscanend1=[s_ascanend],1
-       cmp.lt.unc p_nnc,p0=s_vlen,s_tm7        // compare with nice_match
+        // scan_end1=scan[best_len-1] NB: s_ascanend reset:
+        ld1.nt1 s_vscanend1=[s_ascanend],1
+        cmp.lt.unc p_nnc,p0=s_vlen,s_tm7       // compare with nice_match
 (p_nnc)        br.cond.dptk.many .next_iter
 (p_nnc)        br.cond.dptk.many .next_iter
-       ;;
+        ;;
 }
 .terminate:
 {.mii  // Cycle 0/1
 }
 .terminate:
 {.mii  // Cycle 0/1
-       nop.m 0
-       movi0 ar.lc=s_lcsave
-       movi0 pr=s_prsave,-1
+        nop.m 0
+        movi0 ar.lc=s_lcsave
+        movi0 pr=s_prsave,-1
 }{.mbb
 }{.mbb
-       nop.m 0
-       nop.b 0
-       br.ret.sptk.many rp     // ret0 is identical to best_len
-       ;;
+        nop.m 0
+        nop.b 0
+        br.ret.sptk.many rp    // ret0 is identical to best_len
+        ;;
 }
 }
-       .endp
+        .endp
 
 
-       .global match_init
-       .proc match_init
+        .global match_init
+        .proc match_init
 match_init:
 match_init:
-       sub ret0=ret0,ret0
-       br.ret.sptk.many rp
-       .endp
+        sub ret0=ret0,ret0
+        br.ret.sptk.many rp
+        .endp
 
 # else
  error: this asm version is for 386 or 680x0 or ia64 only
 
 # else
  error: this asm version is for 386 or 680x0 or ia64 only