projects
/
debian
/
gnuradio
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Imported Upstream version 3.2.2
[debian/gnuradio]
/
gnuradio-core
/
src
/
lib
/
filter
/
complex_dotprod_sse.S
diff --git
a/gnuradio-core/src/lib/filter/complex_dotprod_sse.S
b/gnuradio-core/src/lib/filter/complex_dotprod_sse.S
index 97ad9b665f0866eaeb650ef5b3fa07cc69a64a16..ff5ef5d0cb56a8c1f9916091776fc4f33319e1da 100644
(file)
--- a/
gnuradio-core/src/lib/filter/complex_dotprod_sse.S
+++ b/
gnuradio-core/src/lib/filter/complex_dotprod_sse.S
@@
-81,7
+81,7
@@
GLOB_SYMB(complex_dotprod_sse):
jmp .L1_test
.p2align 4
jmp .L1_test
.p2align 4
-.
l
oop1:
+.
L
oop1:
pxor %mm0, %mm0
punpcklwd 0(%eax), %mm0
pxor %mm0, %mm0
punpcklwd 0(%eax), %mm0
@@
-95,7
+95,7
@@
GLOB_SYMB(complex_dotprod_sse):
addps %xmm0, %xmm4
.L1_test:
decl %ecx
addps %xmm0, %xmm4
.L1_test:
decl %ecx
- jge .
l
oop1
+ jge .
L
oop1
# set up for primary loop which is unrolled 4 times
# set up for primary loop which is unrolled 4 times
@@
-105,7
+105,7
@@
GLOB_SYMB(complex_dotprod_sse):
movaps %xmm5, %xmm7
shrl $2, %ecx # n_2_complex_blocks / 4
movaps %xmm5, %xmm7
shrl $2, %ecx # n_2_complex_blocks / 4
- je .
cleanup
# if zero, take short path
+ je .
Lcleanup
# if zero, take short path
# finish setup and loop priming
# finish setup and loop priming
@@
-129,7
+129,7
@@
GLOB_SYMB(complex_dotprod_sse):
# hence enter loop at top
.p2align 4
# hence enter loop at top
.p2align 4
-.
l
oop2:
+.
L
oop2:
mulps (%edx), %xmm0
addps %xmm2, %xmm6
mulps (%edx), %xmm0
addps %xmm2, %xmm6
@@
-169,7
+169,7
@@
GLOB_SYMB(complex_dotprod_sse):
addl $0x40, %edx
addl $0x10, %eax
decl %ecx
addl $0x40, %edx
addl $0x10, %eax
decl %ecx
- jne .
l
oop2
+ jne .
L
oop2
# OK, now we've done with all the multiplies, but
# we still need to handle the unaccumulated
# OK, now we've done with all the multiplies, but
# we still need to handle the unaccumulated
@@
-188,7
+188,7
@@
GLOB_SYMB(complex_dotprod_sse):
# At this point, xmm4 contains 2x2 partial sums. We need
# to compute a "horizontal complex add" across xmm4.
# At this point, xmm4 contains 2x2 partial sums. We need
# to compute a "horizontal complex add" across xmm4.
-.
cleanup:
# xmm4 = r1 i2 r3 i4
+.
Lcleanup:
# xmm4 = r1 i2 r3 i4
movl 20(%ebp), %eax # @result
movhlps %xmm4, %xmm0 # xmm0 = ?? ?? r1 r2
addps %xmm4, %xmm0 # xmm0 = ?? ?? r1+r3 i2+i4
movl 20(%ebp), %eax # @result
movhlps %xmm4, %xmm0 # xmm0 = ?? ?? r1 r2
addps %xmm4, %xmm0 # xmm0 = ?? ?? r1+r3 i2+i4
@@
-200,3
+200,7
@@
GLOB_SYMB(complex_dotprod_sse):
FUNC_TAIL(complex_dotprod_sse)
.ident "Hand coded x86 SSE assembly"
FUNC_TAIL(complex_dotprod_sse)
.ident "Hand coded x86 SSE assembly"
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif