X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=gnuradio-core%2Fsrc%2Flib%2Ffilter%2Ffcomplex_dotprod_sse.S;h=e390c275cf601bcc4c37cc451eade0ae3ec18c62;hb=ea29b08aeb54227e6628f655ccfdb96fe4d8c378;hp=475f03d3d51eb3d003f1037a4a8fe2f7d5a52947;hpb=18a684bf3dc144c48fc4cc6cc72f5070febd8074;p=debian%2Fgnuradio diff --git a/gnuradio-core/src/lib/filter/fcomplex_dotprod_sse.S b/gnuradio-core/src/lib/filter/fcomplex_dotprod_sse.S index 475f03d3..e390c275 100644 --- a/gnuradio-core/src/lib/filter/fcomplex_dotprod_sse.S +++ b/gnuradio-core/src/lib/filter/fcomplex_dotprod_sse.S @@ -5,7 +5,7 @@ # # GNU Radio is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) +# the Free Software Foundation; either version 3, or (at your option) # any later version. # # GNU Radio is distributed in the hope that it will be useful, @@ -81,7 +81,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): jmp .L1_test .p2align 4 -.loop1: +.Loop1: movlps 0(%eax), %xmm0 shufps $0x50, %xmm0, %xmm0 # b01010000 @@ -92,7 +92,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): addps %xmm0, %xmm4 .L1_test: decl %ecx - jge .loop1 + jge .Loop1 # set up for primary loop which is unrolled 4 times @@ -102,7 +102,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): movaps %xmm5, %xmm7 shrl $2, %ecx # n_2_complex_blocks / 4 - je .cleanup # if zero, take short path + je .Lcleanup # if zero, take short path # finish setup and loop priming @@ -120,7 +120,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): # hence enter loop at top .p2align 4 -.loop2: +.Loop2: addps %xmm2, %xmm6 movlps 0x10(%eax), %xmm2 @@ -152,7 +152,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): addl $0x40, %edx addl $0x20, %eax decl %ecx - jne .loop2 + jne .Loop2 # OK, now we've done with all the multiplies, but # we still need to handle the unaccumulated @@ -171,7 +171,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): # At this point, xmm4 contains 2x2 partial sums. We need # to compute a "horizontal complex add" across xmm4. -.cleanup: # xmm4 = r1 i2 r3 i4 +.Lcleanup: # xmm4 = r1 i2 r3 i4 movl 20(%ebp), %eax # @result movhlps %xmm4, %xmm0 # xmm0 = ?? ?? r1 r2 addps %xmm4, %xmm0 # xmm0 = ?? ?? r1+r3 i2+i4 @@ -182,3 +182,7 @@ GLOB_SYMB(fcomplex_dotprod_sse): FUNC_TAIL(fcomplex_dotprod_sse) .ident "Hand coded x86 SSE assembly" + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif