]> git.gag.com Git - debian/gzip/blobdiff - match.c
Imported Debian patch 1.3.9-1
[debian/gzip] / match.c
diff --git a/match.c b/match.c
deleted file mode 100644 (file)
index 50b48f8..0000000
--- a/match.c
+++ /dev/null
@@ -1,758 +0,0 @@
-/* match.s -- optional optimized asm version of longest match in deflate.c
- * Copyright (C) 2002 Free Software Foundation, Inc.
- * Copyright (C) 1992-1993 Jean-loup Gailly
- * This is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License, see the file COPYING.
- *
- * The 68020 version has been written by Francesco Potorti` <pot@cnuce.cnr.it>
- * with adaptations by Carsten Steger <stegerc@informatik.tu-muenchen.de>,
- * Andreas Schwab <schwab@lamothe.informatik.uni-dortmund.de> and
- * Kristoffer Eriksson <ske@pkmab.se>
- *
- * The ia64 version has been written by Sverre Jarp (HP Labs) 2001-2002.
- * Unwind directives and some reformatting for better readability added by
- * David Mosberger-Tang <davidm@hpl.hp.com>.
- */
-
-/* $Id: match.S,v 0.14 1993/06/11 18:33:24 jloup Exp $ */
-
-/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
- * external symbols with an underline character '_'.
- */
-#ifdef NO_UNDERLINE
-#  define _prev             prev
-#  define _window           window
-#  define _match_start     match_start
-#  define _prev_length     prev_length
-#  define _good_match      good_match
-#  define _nice_match      nice_match
-#  define _strstart        strstart
-#  define _max_chain_length max_chain_length
-
-#  define _match_init       match_init
-#  define _longest_match    longest_match
-#endif
-
-#ifdef DYN_ALLOC
-  error: DYN_ALLOC not yet supported in match.s
-#endif
-
-#if defined(i386) || defined(_I386) || defined(__i386) || defined(__i386__)
-
-/* This version is for 386 Unix or OS/2 in 32 bit mode.
- * Warning: it uses the AT&T syntax: mov source,dest
- * This file is only optional. If you want to force the C version,
- * add -DNO_ASM to CFLAGS in Makefile and set OBJA to an empty string.
- * If you have reduced WSIZE in gzip.h, then change its value below.
- * This version assumes static allocation of the arrays (-DDYN_ALLOC not used).
- */
-
-       .file   "match.S"
-
-#define MAX_MATCH      258
-#define MAX_MATCH2     $128 /* MAX_MATCH/2-1 */
-#define MIN_MATCH      3
-#define    WSIZE       $32768
-#define MAX_DIST       WSIZE - MAX_MATCH - MIN_MATCH - 1
-
-       .globl  _match_init
-       .globl  _longest_match
-
-       .text
-
-_match_init:
-       ret
-
-/*-----------------------------------------------------------------------
- * Set match_start to the longest match starting at the given string and
- * return its length. Matches shorter or equal to prev_length are discarded,
- * in which case the result is equal to prev_length and match_start is
- * garbage.
- * IN assertions: cur_match is the head of the hash chain for the current
- *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
- */
-
-_longest_match:        /* int longest_match(cur_match) */
-
-#define cur_match   20(%esp)
-     /* return address */               /* esp+16 */
-        push    %ebp                    /* esp+12 */
-        push    %edi                    /* esp+8  */
-       push    %esi                    /* esp+4  */
-       push    %ebx                    /* esp    */
-
-/*
- *      match        equ esi
- *      scan         equ edi
- *      chain_length equ ebp
- *      best_len     equ ebx
- *      limit        equ edx
- */
-       mov     cur_match,%esi
-        mov    _max_chain_length,%ebp /* chain_length = max_chain_length */
-       mov     _strstart,%edi
-       mov     %edi,%edx
-       sub     MAX_DIST,%edx          /* limit = strstart-MAX_DIST */
-       jae     limit_ok
-       sub     %edx,%edx              /* limit = NIL */
-limit_ok:
-        add    $2+_window,%edi        /* edi = offset(window+strstart+2) */
-        mov    _prev_length,%ebx      /* best_len = prev_length */
-        movw   -3(%ebx,%edi),%ax      /* ax = scan[best_len-1..best_len] */
-        movw   -2(%edi),%cx           /* cx = scan[0..1] */
-       cmp     _good_match,%ebx       /* do we have a good match already? */
-        jb      do_scan
-       shr     $2,%ebp                /* chain_length >>= 2 */
-        jmp     do_scan
-
-        .align  4
-long_loop:
-/* at this point, edi == scan+2, esi == cur_match */
-        movw   -3(%ebx,%edi),%ax       /* ax = scan[best_len-1..best_len] */
-        movw     -2(%edi),%cx           /* cx = scan[0..1] */
-short_loop:
-/*
- * at this point, di == scan+2, si == cur_match,
- * ax = scan[best_len-1..best_len] and cx = scan[0..1]
- */
-        and     WSIZE-1, %esi
-        movw    _prev(%esi,%esi),%si    /* cur_match = prev[cur_match] */
-                                        /* top word of esi is still 0 */
-        cmp     %edx,%esi              /* cur_match <= limit ? */
-        jbe     the_end
-        dec     %ebp                    /* --chain_length */
-        jz      the_end
-do_scan:
-        cmpw    _window-1(%ebx,%esi),%ax/* check match at best_len-1 */
-        jne     short_loop
-        cmpw    _window(%esi),%cx       /* check min_match_length match */
-        jne     short_loop
-
-        lea     _window+2(%esi),%esi    /* si = match */
-        mov     %edi,%eax               /* ax = scan+2 */
-        mov    MAX_MATCH2,%ecx         /* scan for at most MAX_MATCH bytes */
-        rep;    cmpsw                   /* loop until mismatch */
-        je      maxmatch                /* match of length MAX_MATCH? */
-mismatch:
-        movb    -2(%edi),%cl        /* mismatch on first or second byte? */
-        subb    -2(%esi),%cl        /* cl = 0 if first bytes equal */
-        xchg    %edi,%eax           /* edi = scan+2, eax = end of scan */
-        sub     %edi,%eax           /* eax = len */
-       sub     %eax,%esi           /* esi = cur_match + 2 + offset(window) */
-       sub     $2+_window,%esi     /* esi = cur_match */
-        subb    $1,%cl              /* set carry if cl == 0 (cannot use DEC) */
-        adc     $0,%eax             /* eax = carry ? len+1 : len */
-        cmp     %ebx,%eax           /* len > best_len ? */
-        jle     long_loop
-        mov     %esi,_match_start       /* match_start = cur_match */
-        mov     %eax,%ebx               /* ebx = best_len = len */
-        cmp     _nice_match,%eax        /* len >= nice_match ? */
-        jl      long_loop
-the_end:
-        mov     %ebx,%eax               /* result = eax = best_len */
-       pop     %ebx
-        pop     %esi
-        pop     %edi
-        pop     %ebp
-        ret
-maxmatch:
-        cmpsb
-        jmp     mismatch
-
-#else
-
-/* ======================== 680x0 version ================================= */
-
-#if defined(m68k)||defined(mc68k)||defined(__mc68000__)||defined(__MC68000__)
-#  ifndef mc68000
-#    define mc68000
-#  endif
-#endif
-
-#if defined(__mc68020__) || defined(__MC68020__) || defined(sysV68)
-#  ifndef mc68020
-#    define mc68020
-#  endif
-#endif
-
-#if defined(mc68020) || defined(mc68000)
-
-#if (defined(mc68020) || defined(NeXT)) && !defined(UNALIGNED_OK)
-#  define UNALIGNED_OK
-#endif
-
-#ifdef sysV68  /* Try Motorola Delta style */
-
-#  define GLOBAL(symbol)       global  symbol
-#  define TEXT                 text
-#  define FILE(filename)       file    filename
-#  define invert_maybe(src,dst)        dst,src
-#  define imm(data)            &data
-#  define reg(register)                %register
-
-#  define addl                 add.l
-#  define addql                        addq.l
-#  define blos                 blo.b
-#  define bhis                 bhi.b
-#  define bras                 bra.b
-#  define clrl                 clr.l
-#  define cmpmb                        cmpm.b
-#  define cmpw                 cmp.w
-#  define cmpl                 cmp.l
-#  define lslw                 lsl.w
-#  define lsrl                 lsr.l
-#  define movel                        move.l
-#  define movew                        move.w
-#  define moveb                        move.b
-#  define moveml               movem.l
-#  define subl                 sub.l
-#  define subw                 sub.w
-#  define subql                        subq.l
-
-#  define IndBase(bd,An)       (bd,An)
-#  define IndBaseNdxl(bd,An,Xn)        (bd,An,Xn.l)
-#  define IndBaseNdxw(bd,An,Xn)        (bd,An,Xn.w)
-#  define predec(An)           -(An)
-#  define postinc(An)          (An)+
-
-#else /* default style (Sun 3, NeXT, Amiga, Atari) */
-
-#  define GLOBAL(symbol)       .globl  symbol
-#  define TEXT                 .text
-#  define FILE(filename)       .even
-#  define invert_maybe(src,dst)        src,dst
-#  if defined(sun) || defined(mc68k)
-#    define imm(data)          #data
-#  else
-#    define imm(data)          \#data
-#  endif
-#  define reg(register)                register
-
-#  define blos                 bcss
-#  if defined(sun) || defined(mc68k)
-#    define movel              movl
-#    define movew              movw
-#    define moveb              movb
-#  endif
-#  define IndBase(bd,An)       An@(bd)
-#  define IndBaseNdxl(bd,An,Xn)        An@(bd,Xn:l)
-#  define IndBaseNdxw(bd,An,Xn)        An@(bd,Xn:w)
-#  define predec(An)           An@-
-#  define postinc(An)          An@+
-
-#endif /* styles */
-
-#define Best_Len       reg(d0)         /* unsigned */
-#define Cur_Match      reg(d1)         /* Ipos */
-#define Loop_Counter   reg(d2)         /* int */
-#define Scan_Start     reg(d3)         /* unsigned short */
-#define Scan_End       reg(d4)         /* unsigned short */
-#define Limit          reg(d5)         /* IPos */
-#define Chain_Length   reg(d6)         /* unsigned */
-#define Scan_Test      reg(d7)
-#define Scan           reg(a0)         /* *uch */
-#define Match          reg(a1)         /* *uch */
-#define Prev_Address   reg(a2)         /* *Pos */
-#define Scan_Ini       reg(a3)         /* *uch */
-#define Match_Ini      reg(a4)         /* *uch */
-#define Stack_Pointer  reg(sp)
-
-#define MAX_MATCH      258
-#define MIN_MATCH      3
-#define WSIZE          32768
-#define MAX_DIST       (WSIZE - MAX_MATCH - MIN_MATCH - 1)
-
-       GLOBAL  (_match_init)
-       GLOBAL  (_longest_match)
-
-       TEXT
-
-       FILE    ("match.S")
-
-_match_init:
-       rts
-
-/*-----------------------------------------------------------------------
- * Set match_start to the longest match starting at the given string and
- * return its length. Matches shorter or equal to prev_length are discarded,
- * in which case the result is equal to prev_length and match_start is
- * garbage.
- * IN assertions: cur_match is the head of the hash chain for the current
- *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
- */
-
-/* int longest_match (cur_match) */
-
-#ifdef UNALIGNED_OK
-#  define pushreg      15928           /* d2-d6/a2-a4 */
-#  define popreg       7292
-#else
-#  define pushreg      16184           /* d2-d7/a2-a4 */
-#  define popreg       7420
-#endif
-
-_longest_match:
-       movel   IndBase(4,Stack_Pointer),Cur_Match
-       moveml  imm(pushreg),predec(Stack_Pointer)
-       movel   _max_chain_length,Chain_Length
-       movel   _prev_length,Best_Len
-       movel   imm(_prev),Prev_Address
-       movel   imm(_window+MIN_MATCH),Match_Ini
-       movel   _strstart,Limit
-       movel   Match_Ini,Scan_Ini
-       addl    Limit,Scan_Ini
-       subw    imm(MAX_DIST),Limit
-       bhis    L__limit_ok
-       clrl    Limit
-L__limit_ok:
-       cmpl    invert_maybe(_good_match,Best_Len)
-       blos    L__length_ok
-       lsrl    imm(2),Chain_Length
-L__length_ok:
-       subql   imm(1),Chain_Length
-#ifdef UNALIGNED_OK
-       movew   IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
-       movew   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
-#else
-       moveb   IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
-       lslw    imm(8),Scan_Start
-       moveb   IndBase(-MIN_MATCH+1,Scan_Ini),Scan_Start
-       moveb   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
-       lslw    imm(8),Scan_End
-       moveb   IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
-#endif
-       bras    L__do_scan
-
-L__long_loop:
-#ifdef UNALIGNED_OK
-       movew   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
-#else
-       moveb   IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
-       lslw    imm(8),Scan_End
-       moveb   IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
-#endif
-
-L__short_loop:
-       lslw    imm(1),Cur_Match
-       movew   IndBaseNdxl(0,Prev_Address,Cur_Match),Cur_Match
-       cmpw    invert_maybe(Limit,Cur_Match)
-       dbls    Chain_Length,L__do_scan
-       bras    L__return
-
-L__do_scan:
-       movel   Match_Ini,Match
-       addl    Cur_Match,Match
-#ifdef UNALIGNED_OK
-       cmpw    invert_maybe(IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_End)
-       bne     L__short_loop
-       cmpw    invert_maybe(IndBase(-MIN_MATCH,Match),Scan_Start)
-       bne     L__short_loop
-#else
-       moveb   IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_Test
-       lslw    imm(8),Scan_Test
-       moveb   IndBaseNdxw(-MIN_MATCH,Match,Best_Len),Scan_Test
-       cmpw    invert_maybe(Scan_Test,Scan_End)
-       bne     L__short_loop
-       moveb   IndBase(-MIN_MATCH,Match),Scan_Test
-       lslw    imm(8),Scan_Test
-       moveb   IndBase(-MIN_MATCH+1,Match),Scan_Test
-       cmpw    invert_maybe(Scan_Test,Scan_Start)
-       bne     L__short_loop
-#endif
-
-       movew   imm((MAX_MATCH-MIN_MATCH+1)-1),Loop_Counter
-       movel   Scan_Ini,Scan
-L__scan_loop:
-       cmpmb   postinc(Match),postinc(Scan)
-       dbne    Loop_Counter,L__scan_loop
-
-       subl    Scan_Ini,Scan
-       addql   imm(MIN_MATCH-1),Scan
-       cmpl    invert_maybe(Best_Len,Scan)
-       bls     L__short_loop
-       movel   Scan,Best_Len
-       movel   Cur_Match,_match_start
-       cmpl    invert_maybe(_nice_match,Best_Len)
-       blos    L__long_loop
-L__return:
-       moveml  postinc(Stack_Pointer),imm(popreg)
-       rts
-
-#else
-
-# if defined (__ia64__)
-
-/* ======================== ia64 version ================================= */
-
-/*
- * 'longest_match.S' (assembly program for gzip for the IA-64 architecture)
- *
- * Optimised for McKinley, but with Merced-compatibility, such as MIB+MIB, used wherever
- * possible.
- *
- * Copyright: Sverre Jarp (HP Labs) 2001-2002
- *
- * See deflate.c for c-version
- * Version 2 - Optimize the outer loop
- */
-
-#include <endian.h>
-
-#if __BYTE_ORDER == ____BIG_ENDIAN
-#define  first  shl
-#define  second shr.u
-#define  count  czx1.l
-#else
-#define  first  shr.u
-#define  second shl
-#define  count  czx1.r
-#endif
-
-// 24 rotating register (r32 - r55)
-
-#define s_vmatch0              r32
-#define s_vmatch1              r33
-#define s_vmatbst              r34
-#define s_vmatbst1             r35
-#define s_amatblen             r36
-
-#define s_tm1                  r56
-#define s_tm2                  r57
-#define s_tm3                  r58
-#define s_tm4                  r59
-#define s_tm5                  r60
-#define        s_tm6                   r61
-#define s_tm7                  r62
-#define s_tm8                  r63
-
-#define s_vlen                 r31
-#define s_vstrstart            r30
-#define s_vchainlen            r29
-#define s_awinbest             r28
-#define s_vcurmatch            r27
-#define s_vlimit               r26
-#define s_vscanend             r25
-#define s_vscanend1            r24
-#define s_anicematch           r23
-#define        s_vscan0                r22
-#define s_vscan1               r21
-
-#define s_aprev                        r20
-#define s_awindow              r19
-#define s_amatchstart          r18
-#define s_ascan                        r17
-#define s_amatch               r16
-#define s_wmask                        r15
-#define s_ascanend             r14
-
-#define s_vspec_cmatch         r11             // next iteration
-#define s_lcsave               r10
-#define s_prsave               r9
-#define s_vbestlen             r8              // return register
-
-#define s_vscan3               r3
-#define s_vmatch3              r2
-
-#define p_no                   p2
-#define p_yes                  p3
-#define p_shf                  p4              //
-#define p_bn2                  p5              // Use in loop (indicating bestlen != 2)
-
-#define p_nbs                  p9              // not new best_len
-#define p_nnc                  p10             // not nice_length
-#define p_ll                   p11
-#define p_end                  p12
-
-#define MAX_MATCH              258
-#define MIN_MATCH                4
-#define WSIZE                32768
-#define MAX_DIST               WSIZE - MAX_MATCH - MIN_MATCH - 1
-
-#define        R_INPUT                 1
-#define R_LOCAL                        31
-#define        R_OUTPUT                0
-#define R_ROTATING             24
-#define MLAT                   3
-#define SHLAT                  2
-
-#define        mova                    mov
-#define movi0                  mov
-#define cgtu                   cmp.gt.unc
-#define cgeu                   cmp.ge.unc
-#define cneu                   cmp.ne.unc
-
-       .global longest_match
-       .proc longest_match
-       .align 32
-longest_match:
-// --  Cycle: 0
-       .prologue
-{.mmi
-        alloc r2=ar.pfs,R_INPUT,R_LOCAL,R_OUTPUT,R_ROTATING
-       .rotr scan[MLAT+2], match[MLAT+2], shscan0[SHLAT+1], \
-             shscan1[SHLAT+1], shmatch0[SHLAT+1], shmatch1[SHLAT+1]
-       .rotp lc[MLAT+SHLAT+2]
-       mova s_vspec_cmatch=in0 // cur_match from input register
-       add s_tm1=@gprel(strstart),gp // a(a(strstart))
-}{.mmi
-       add s_tm3=@gprel(prev_length),gp // a(a(prev_length))
-       add s_tm5=@ltoff(window),gp // a(a(window))
-       add s_tm6=@ltoff(prev),gp // a(a(prev))
-       ;;
-}{.mmb //  Cycle: 1
-       ld4 s_vstrstart=[s_tm1] // strstart
-       ld4 s_vbestlen=[s_tm3] // best_len = prev_length
-       brp.loop.imp .cmploop,.cmploop+48
-}{.mli
-       add s_tm2=@gprel(max_chain_length),gp // a(a(max_chain_length))
-       movl s_wmask=WSIZE-1
-       ;;
-}{.mmi //  Cycle: 2
-       ld8 s_aprev=[s_tm6] // a(prev)
-       ld8 s_awindow=[s_tm5] // a(window)
-       .save pr, s_prsave
-       movi0 s_prsave=pr // save predicates
-}{.mmi
-       add s_tm4=@gprel(good_match),gp // a(a(good_match))
-       add s_tm7=@ltoff(nice_match),gp // a(a(nice_match))
-       add s_tm8=@ltoff(match_start),gp // a(match_start)
-       ;;
-}{.mmi //  Cycle: 3
-       ld8 s_anicematch=[s_tm7] // a(nice_match)
-       ld8 s_amatchstart=[s_tm8] // a(match_start)
-       .save ar.lc, s_lcsave
-       movi0 s_lcsave=ar.lc // save loop count register
-}{.mmi
-       .body
-       add s_tm1=-(MAX_MATCH + MIN_MATCH),s_wmask // maxdist
-       cmp.eq p_ll,p0=r0,r0 // parallel compare initialized as 'true'
-       mova s_vcurmatch=s_vspec_cmatch
-       ;;
-}{.mmi //  Cycle: 4
-       ld4 s_vchainlen=[s_tm2] // chain_length=max_chain_length
-       ld4 s_tm4=[s_tm4] // v(good_match)
-       add s_ascan=s_awindow,s_vstrstart // scan=window + strstart
-}{.mmi
-       sub s_vlimit=s_vstrstart, s_tm1 // limit=strstart - MAX_DIST
-       add s_amatch=s_awindow,s_vspec_cmatch // match=window + cur_match
-       and s_vspec_cmatch =s_vspec_cmatch,s_wmask
-       ;;
-}{.mmi //  Cycle: 5
-       add s_amatblen=s_amatch,s_vbestlen //
-       cneu p_bn2,p0=2,s_vbestlen // set if bestlen != 2
-       add s_ascanend=s_ascan,s_vbestlen // compute a(scan) + best_len
-}{.mmi
-       ld1 s_vscan0=[s_ascan],1 // NB: s_ascan++
-       ld1 s_vmatch0=[s_amatch],1
-       cgtu p0,p_no=s_vlimit,r0 // is result positive ?
-       ;;
-}{.mmi //  Cycle: 6
-       ld1.nt1 s_vscan1=[s_ascan],2 // NB: s_ascan+3 in total
-       ld1.nt1 s_vmatch1=[s_amatch],2
-       add s_awinbest=s_awindow,s_vbestlen //
-       ;;
-}{.mmi //  Cycle: 7
-       ld1.nt1 s_vscanend=[s_ascanend],-1 // scan_end=scan[best_len]
-       ld1.nt1 s_vmatbst=[s_amatblen],-1
-(p_no) mova s_vlimit=r0
-       ;;
-}{.mmi //  Cycle: 8
-(p_bn2)        ld1.nt1 s_vscanend1=[s_ascanend],1 // scan_end1=scan[best_len-1]
-(p_bn2)        ld1.nt1 s_vmatbst1=[s_amatblen]
-       shladd s_vspec_cmatch =s_vspec_cmatch,1,s_aprev
-}{.mmi
-       cgeu p_shf,p0=s_vbestlen,s_tm4 // is (prev_length >= good_match) ?
-       ;;
-}{.mmi //  Cycle: 9
-       ld1.nt1 s_vscan3=[s_ascan]
-       ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]
-       mova    s_vlen=3
-}{.mmi
-(p_shf)        shr.u s_vchainlen=s_vchainlen,2 // (cur_len) >> 2
-       ;;
-}{.mmi //  Cycle: 10
-       ld1.nt1 s_vmatch3=[s_amatch]
-       // p_ll switched on as soon as we get a mismatch:
-       cmp.eq.and p_ll,p0=s_vmatch0,s_vscan0
-       cmp.eq.and p_ll,p0=s_vmatbst,s_vscanend
-}{.mib
-       cmp.eq.and p_ll,p0=s_vmatch1,s_vscan1
-(p_bn2)        cmp.eq.and p_ll,p0=s_vmatbst1,s_vscanend1
-(p_ll) br.cond.dpnt.many .test_more
-       ;;
-}
-
-.next_iter:
-{.mmi  // Cycle 0
-       add s_amatch=s_awindow,s_vspec_cmatch   // match=window + cur_match
-       mov s_vcurmatch=s_vspec_cmatch          // current value
-       add s_vchainlen=-1,s_vchainlen          // --chain_length
-}{.mib
-       cmp.le.unc p_end,p0=s_vspec_cmatch,s_vlimit
-       and s_vspec_cmatch=s_vspec_cmatch,s_wmask
-(p_end)        br.cond.dptk.many .terminate
-       ;;
-}{.mmi // Cycle 1
-       ld1 s_vmatch0=[s_amatch],1              // load match[0]
-       // compute prev[cur_match]:
-       shladd s_vspec_cmatch=s_vspec_cmatch,1,s_aprev
-       cmp.eq.unc p_end,p0=s_vchainlen,r0
-} {.mib
-       nop.m 0
-       add s_amatblen=s_awinbest,s_vcurmatch   // match=window + cur_match
-(p_end)        br.cond.dptk.many .terminate
-       ;;
-}{.mmi // Cycle 2 (short)
-       ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]         // get next cur_match
-       ;;
-}{.mmi // Cycle 3 (short)
-       ld1.nt1 s_vmatbst=[s_amatblen],-1       // load match[best_len]
-       cmp.ne.unc p_ll,p0=r0,r0     // parallel compare initialized as 'false'
-       ;;
-}{.mmi // Cycle 4 (short)
-       // load match[1] - - note: match += 3 (in total):
-       ld1.nt1 s_vmatch1=[s_amatch],2
-       ;;
-       // Cycle 5  (short)
-(p_bn2)        ld1.nt1 s_vmatbst1=[s_amatblen]         // load match[best_len-1]
-}{.mib // Here we (MOST LIKELY) pay a L2-fetch stall
-       // p_ll switched on as soon as we get a mismatch:
-       cmp.ne.or p_ll,p0=s_vmatch0,s_vscan0
-       cmp.ne.or p_ll,p0=s_vmatbst,s_vscanend
-(p_ll) br.cond.dptk.many .next_iter
-       ;;
-}{.mmi // Cycle 6
-       ld1.nt1 s_vmatch3=[s_amatch]
-       mova s_vlen=3
-       nop.i 0
-}{.mib
-       cmp.ne.or p_ll,p0=s_vmatch1,s_vscan1
-(p_bn2)        cmp.ne.or p_ll,p0=s_vmatbst1,s_vscanend1
-(p_ll) br.cond.dptk.many .next_iter
-       ;;
-}
-
-// We have passed the first hurdle - Are there additional matches ???
-
-.test_more:
-{.mmi  // Cycle 0
-       and s_tm3=7,s_ascan                     // get byte offset
-       and s_tm4=7,s_amatch                    // get byte offset
-       movi0 ar.ec=MLAT+SHLAT+2                // NB: One trip more than usual
-}{.mib
-       cmp.ne.unc p_no,p0=s_vscan3,s_vmatch3   // does not next one differ?
-(p_no)  br.cond.dptk.many .only3
-       ;;
-}{.mmi // Cycle 1
-       and s_tm1=-8,s_ascan    // get aligned address
-       shladd s_tm3=s_tm3,3,r0
-       movi0 ar.lc=31          // 32 times around the loop (8B at a time)
-}{.mib
-       and s_tm2=-8,s_amatch                   // get aligned address
-       shladd s_tm4=s_tm4,3,r0
-       nop.b 0
-       ;;
-}{.mmi // Cycle 2
-       ld8.nt1 scan[1]=[s_tm1],8                       // load first chunk
-       sub s_tm5=64,s_tm3                              // 64 - amount
-       movi0 pr.rot=1<<16
-}{.mmi
-       ld8.nt1 match[1]=[s_tm2],8      // load first chunk
-       sub s_tm6=64,s_tm4              // 64 - amount
-       add s_vlen=-8,s_vlen            // will be updated at least once
-       ;;
-}
-       .align 32
-.cmploop:
-{.mmi  // Cycle 0
-(lc[0])                        ld8 scan[0]=[s_tm1],8           // next scan chunk
-(lc[MLAT+SHLAT+1])     add s_vlen=8,s_vlen
-(lc[MLAT])             first shscan0[0]=scan[MLAT+1],s_tm3
-}{.mib
-(lc[MLAT+SHLAT+1])     cmp.ne.unc p_no,p0=s_tm7,s_tm8  // break search if !=
-(lc[MLAT])             first shmatch0[0]=match[MLAT+1],s_tm4
-(p_no)                 br.cond.dpnt.many .mismatch
-                       ;;
-}{.mii  // Cycle 1
-(lc[0])                        ld8 match[0]=[s_tm2],8
-                       // shift left(le) or right(be):
-(lc[MLAT])             second shscan1[0]=scan[MLAT],s_tm5
-(lc[MLAT])             second shmatch1[0]=match[MLAT],s_tm6
-}{.mmb
-(lc[MLAT+SHLAT])       or s_tm7=shscan0[SHLAT],shscan1[SHLAT]
-(lc[MLAT+SHLAT])       or s_tm8=shmatch0[SHLAT],shmatch1[SHLAT]
-                       br.ctop.dptk.many .cmploop
-                       ;;
-}{.mfi
-       mov s_vlen=258
-       nop.f 0
-}{.mfi
-       nop.f 0    // realign
-       ;;
-}
-.mismatch:
-{.mii  // Cycle 0 (short)
-(p_no) pcmp1.eq s_tm2=s_tm7,s_tm8      // find first non-matching character
-       nop.i 0
-       ;;
-       // Cycle 1 (short)
-(p_no) count s_tm1=s_tm2
-       ;;
-}{.mib // Cycle 2 (short)
-(p_no) add s_vlen=s_vlen,s_tm1         // effective length
-       nop.i 0
-       clrrrb
-       ;;
-}
-
-.only3:
-{.mib  // Cycle 0 (short)
-       cmp.gt.unc p0,p_nbs=s_vlen,s_vbestlen           // (len > best_len) ?
-(p_nbs)        br.cond.dpnt.many .next_iter                    // if not, re-iternate
-       ;;
-}{.mmi // Cycle 1 (short)
-       ld4 s_tm7=[s_anicematch]                        // nice_match
-       st4 [s_amatchstart]= s_vcurmatch
-       add s_ascanend=s_ascan,s_vlen                   // reset with best_len
-       ;;
-}{.mmi // Cycle 2 (short)
-       mova s_vbestlen=s_vlen
-       add s_ascanend=-3,s_ascanend            // remember extra offset
-       ;;
-}{.mmi // Cycle 3 (short)
-       ld1 s_vscanend=[s_ascanend],-1          // scan_end=scan[best_len]
-       add s_awinbest=s_awindow,s_vbestlen     // update with new best_len
-       cmp.ne.unc p_bn2,p0=2,s_vbestlen        // set if bestlen != 2
-       ;;
-}{.mib // Cycle 4 (short)
-       // scan_end1=scan[best_len-1] NB: s_ascanend reset:
-       ld1.nt1 s_vscanend1=[s_ascanend],1
-       cmp.lt.unc p_nnc,p0=s_vlen,s_tm7        // compare with nice_match
-(p_nnc)        br.cond.dptk.many .next_iter
-       ;;
-}
-.terminate:
-{.mii  // Cycle 0/1
-       nop.m 0
-       movi0 ar.lc=s_lcsave
-       movi0 pr=s_prsave,-1
-}{.mbb
-       nop.m 0
-       nop.b 0
-       br.ret.sptk.many rp     // ret0 is identical to best_len
-       ;;
-}
-       .endp
-
-       .global match_init
-       .proc match_init
-match_init:
-       sub ret0=ret0,ret0
-       br.ret.sptk.many rp
-       .endp
-
-# else
- error: this asm version is for 386 or 680x0 or ia64 only 
-# endif /* __ia64__ */
-#endif /* mc68000 || mc68020 */
-#endif /* i386 || _I386   */