#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
+# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
jmp .L1_test
.p2align 4
-.loop1:
+.float_dotprod_sse_loop1:
movaps (%eax), %xmm0
mulps (%edx), %xmm0
addl $0x10, %edx
addps %xmm0, %xmm4
.L1_test:
decl %ecx
- jge .loop1
+ jge .float_dotprod_sse_loop1
# set up for primary loop which is unrolled 4 times
movaps %xmm5, %xmm7
shrl $2, %ecx # n_4_float_blocks / 4
- je .cleanup # if zero, take short path
+ je .Lcleanup # if zero, take short path
# finish setup and loop priming
# hence enter loop at top
.p2align 4
-.loop2:
+.float_dotprod_sse_loop2:
mulps (%edx), %xmm0
addps %xmm2, %xmm6
movaps 0x20(%eax), %xmm2
addl $0x40, %edx
addl $0x40, %eax
decl %ecx
- jne .loop2
+ jne .float_dotprod_sse_loop2
# OK, now we've done with all the multiplies, but
# we still need to handle the unaccumulated
# to compute a "horizontal add" across xmm4.
# This is a fairly nasty operation...
-.cleanup: # xmm4 = d1 d2 d3 d4
+.Lcleanup: # xmm4 = d1 d2 d3 d4
xorps %xmm0, %xmm0 # xmm0 = 0 0 0 0 (may be unnecessary)
movhlps %xmm4, %xmm0 # xmm0 = 0 0 d1 d2
addps %xmm4, %xmm0 # xmm0 = d1 d2 d1+d3 d2+d4
FUNC_TAIL(float_dotprod_sse)
.ident "Hand coded x86 SSE assembly"
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif