X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=gnuradio-core%2Fsrc%2Flib%2Ffilter%2Ffloat_dotprod_sse64.S;h=4c5ad95cd0abecea0cf81eb2b6472b4af7789221;hb=ea29b08aeb54227e6628f655ccfdb96fe4d8c378;hp=c5e56f368e68f488508d24ed9b0135fd4af3c05f;hpb=09a1e803a9e6587c78d20cdf16891e5295874668;p=debian%2Fgnuradio diff --git a/gnuradio-core/src/lib/filter/float_dotprod_sse64.S b/gnuradio-core/src/lib/filter/float_dotprod_sse64.S index c5e56f36..4c5ad95c 100644 --- a/gnuradio-core/src/lib/filter/float_dotprod_sse64.S +++ b/gnuradio-core/src/lib/filter/float_dotprod_sse64.S @@ -77,7 +77,7 @@ GLOB_SYMB(float_dotprod_sse): jmp .L1_test .p2align 4 -.loop1: +.Loop1: movaps (%rsi), %xmm0 mulps (%rdi), %xmm0 add $0x10, %rdi @@ -85,7 +85,7 @@ GLOB_SYMB(float_dotprod_sse): addps %xmm0, %xmm4 .L1_test: dec %rax - jge .loop1 + jge .Loop1 # set up for primary loop which is unrolled 4 times @@ -94,7 +94,7 @@ GLOB_SYMB(float_dotprod_sse): movaps %xmm5, %xmm7 shr $2, %rdx # n_4_float_blocks / 4 - je .cleanup # if zero, take short path + je .Lcleanup # if zero, take short path # finish setup and loop priming @@ -107,7 +107,7 @@ GLOB_SYMB(float_dotprod_sse): # hence enter loop at top .p2align 4 -.loop2: +.Loop2: mulps (%rdi), %xmm0 addps %xmm2, %xmm6 movaps 0x20(%rsi), %xmm2 @@ -127,7 +127,7 @@ GLOB_SYMB(float_dotprod_sse): add $0x40, %rdi add $0x40, %rsi dec %rdx - jne .loop2 + jne .Loop2 # OK, now we've done with all the multiplies, but # we still need to handle the unaccumulated @@ -147,7 +147,7 @@ GLOB_SYMB(float_dotprod_sse): # to compute a "horizontal add" across xmm4. # This is a fairly nasty operation... -.cleanup: # xmm4 = d1 d2 d3 d4 +.Lcleanup: # xmm4 = d1 d2 d3 d4 xorps %xmm0, %xmm0 # xmm0 = 0 0 0 0 (may be unnecessary) movhlps %xmm4, %xmm0 # xmm0 = 0 0 d1 d2 addps %xmm4, %xmm0 # xmm0 = d1 d2 d1+d3 d2+d4 @@ -159,3 +159,7 @@ GLOB_SYMB(float_dotprod_sse): FUNC_TAIL(float_dotprod_sse) .ident "Hand coded x86_64 SSE assembly" + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif