X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=gnuradio-core%2Fsrc%2Flib%2Ffilter%2Ffloat_dotprod_sse.S;h=31b0f3832eaa19f9606ebf5b598ad764e5f45304;hb=ea29b08aeb54227e6628f655ccfdb96fe4d8c378;hp=a3a2df399b8f0bbe076d39899086ebaae2337790;hpb=09a1e803a9e6587c78d20cdf16891e5295874668;p=debian%2Fgnuradio diff --git a/gnuradio-core/src/lib/filter/float_dotprod_sse.S b/gnuradio-core/src/lib/filter/float_dotprod_sse.S index a3a2df39..31b0f383 100644 --- a/gnuradio-core/src/lib/filter/float_dotprod_sse.S +++ b/gnuradio-core/src/lib/filter/float_dotprod_sse.S @@ -79,7 +79,7 @@ GLOB_SYMB(float_dotprod_sse): jmp .L1_test .p2align 4 -.loop1: +.Loop1: movaps (%eax), %xmm0 mulps (%edx), %xmm0 addl $0x10, %edx @@ -87,7 +87,7 @@ GLOB_SYMB(float_dotprod_sse): addps %xmm0, %xmm4 .L1_test: decl %ecx - jge .loop1 + jge .Loop1 # set up for primary loop which is unrolled 4 times @@ -97,7 +97,7 @@ GLOB_SYMB(float_dotprod_sse): movaps %xmm5, %xmm7 shrl $2, %ecx # n_4_float_blocks / 4 - je .cleanup # if zero, take short path + je .Lcleanup # if zero, take short path # finish setup and loop priming @@ -110,7 +110,7 @@ GLOB_SYMB(float_dotprod_sse): # hence enter loop at top .p2align 4 -.loop2: +.Loop2: mulps (%edx), %xmm0 addps %xmm2, %xmm6 movaps 0x20(%eax), %xmm2 @@ -130,7 +130,7 @@ GLOB_SYMB(float_dotprod_sse): addl $0x40, %edx addl $0x40, %eax decl %ecx - jne .loop2 + jne .Loop2 # OK, now we've done with all the multiplies, but # we still need to handle the unaccumulated @@ -150,7 +150,7 @@ GLOB_SYMB(float_dotprod_sse): # to compute a "horizontal add" across xmm4. # This is a fairly nasty operation... -.cleanup: # xmm4 = d1 d2 d3 d4 +.Lcleanup: # xmm4 = d1 d2 d3 d4 xorps %xmm0, %xmm0 # xmm0 = 0 0 0 0 (may be unnecessary) movhlps %xmm4, %xmm0 # xmm0 = 0 0 d1 d2 addps %xmm4, %xmm0 # xmm0 = d1 d2 d1+d3 d2+d4 @@ -165,3 +165,7 @@ GLOB_SYMB(float_dotprod_sse): FUNC_TAIL(float_dotprod_sse) .ident "Hand coded x86 SSE assembly" + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif