X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=gnuradio-core%2Fsrc%2Flib%2Ffilter%2Fcomplex_dotprod_sse.S;h=ff5ef5d0cb56a8c1f9916091776fc4f33319e1da;hb=refs%2Ftags%2Fupstream%2F3.2.2;hp=97ad9b665f0866eaeb650ef5b3fa07cc69a64a16;hpb=09a1e803a9e6587c78d20cdf16891e5295874668;p=debian%2Fgnuradio diff --git a/gnuradio-core/src/lib/filter/complex_dotprod_sse.S b/gnuradio-core/src/lib/filter/complex_dotprod_sse.S index 97ad9b66..ff5ef5d0 100644 --- a/gnuradio-core/src/lib/filter/complex_dotprod_sse.S +++ b/gnuradio-core/src/lib/filter/complex_dotprod_sse.S @@ -81,7 +81,7 @@ GLOB_SYMB(complex_dotprod_sse): jmp .L1_test .p2align 4 -.loop1: +.Loop1: pxor %mm0, %mm0 punpcklwd 0(%eax), %mm0 @@ -95,7 +95,7 @@ GLOB_SYMB(complex_dotprod_sse): addps %xmm0, %xmm4 .L1_test: decl %ecx - jge .loop1 + jge .Loop1 # set up for primary loop which is unrolled 4 times @@ -105,7 +105,7 @@ GLOB_SYMB(complex_dotprod_sse): movaps %xmm5, %xmm7 shrl $2, %ecx # n_2_complex_blocks / 4 - je .cleanup # if zero, take short path + je .Lcleanup # if zero, take short path # finish setup and loop priming @@ -129,7 +129,7 @@ GLOB_SYMB(complex_dotprod_sse): # hence enter loop at top .p2align 4 -.loop2: +.Loop2: mulps (%edx), %xmm0 addps %xmm2, %xmm6 @@ -169,7 +169,7 @@ GLOB_SYMB(complex_dotprod_sse): addl $0x40, %edx addl $0x10, %eax decl %ecx - jne .loop2 + jne .Loop2 # OK, now we've done with all the multiplies, but # we still need to handle the unaccumulated @@ -188,7 +188,7 @@ GLOB_SYMB(complex_dotprod_sse): # At this point, xmm4 contains 2x2 partial sums. We need # to compute a "horizontal complex add" across xmm4. -.cleanup: # xmm4 = r1 i2 r3 i4 +.Lcleanup: # xmm4 = r1 i2 r3 i4 movl 20(%ebp), %eax # @result movhlps %xmm4, %xmm0 # xmm0 = ?? ?? r1 r2 addps %xmm4, %xmm0 # xmm0 = ?? ?? r1+r3 i2+i4 @@ -200,3 +200,7 @@ GLOB_SYMB(complex_dotprod_sse): FUNC_TAIL(complex_dotprod_sse) .ident "Hand coded x86 SSE assembly" + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif