#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
+# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
#
jmp .L1_test
.p2align 4
-.loop1:
+.Loop1:
pxor %mm0, %mm0
punpcklwd 0(%rdi), %mm0
addps %xmm0, %xmm4
.L1_test:
dec %rax
- jge .loop1
+ jge .Loop1
# set up for primary loop which is unrolled 4 times
shr $2, %rdx # n_2_complex_blocks / 4
movaps %xmm5, %xmm7
- je .cleanup # if zero, take short path
+ je .Lcleanup # if zero, take short path
# finish setup and loop priming
# hence enter loop at top
.p2align 4
-.loop2:
+.Loop2:
mulps (%rsi), %xmm0
addps %xmm2, %xmm6
add $0x40, %rsi
add $0x10, %rdi
dec %rdx
- jne .loop2
+ jne .Loop2
# OK, now we've done with all the multiplies, but
# we still need to handle the unaccumulated
# At this point, xmm4 contains 2x2 partial sums. We need
# to compute a "horizontal complex add" across xmm4.
-.cleanup: # xmm4 = r1 i2 r3 i4
+.Lcleanup: # xmm4 = r1 i2 r3 i4
movhlps %xmm4, %xmm0 # xmm0 = ?? ?? r1 r2
addps %xmm4, %xmm0 # xmm0 = ?? ?? r1+r3 i2+i4
movlps %xmm0, (%rcx) # store low 2x32 bits (complex) to memory
FUNC_TAIL(complex_dotprod_sse)
.ident "Hand coded x86_64 SSE assembly"
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif