1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_shift_q15.c
10 * Description: Shifts the elements of a Q15 vector by a specified number of bits.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Version 1.0.10 2011/7/15
15 * Big Endian support added and Merged M0 and M3/M4 Source code.
17 * Version 1.0.3 2010/11/29
18 * Re-organized the CMSIS folders and updated documentation.
20 * Version 1.0.2 2010/11/11
21 * Documentation updated.
23 * Version 1.0.1 2010/10/05
24 * Production release and review comments incorporated.
26 * Version 1.0.0 2010/09/20
27 * Production release and review comments incorporated.
29 * Version 0.0.7 2010/06/10
30 * Misra-C changes done
31 * -------------------------------------------------------------------- */
45 * @brief Shifts the elements of a Q15 vector a specified number of bits.
46 * @param[in] *pSrc points to the input vector
47 * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
48 * @param[out] *pDst points to the output vector
49 * @param[in] blockSize number of samples in the vector
52 * <b>Scaling and Overflow Behavior:</b>
54 * The function uses saturating arithmetic.
55 * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
64 uint32_t blkCnt; /* loop counter */
65 uint8_t sign; /* Sign of shiftBits */
69 /* Run the below code for Cortex-M4 and Cortex-M3 */
71 q15_t in1, in2; /* Temporary variables */
75 blkCnt = blockSize >> 2u;
77 /* Getting the sign of shiftBits */
78 sign = (shiftBits & 0x80);
80 /* If the shift value is positive then do right shift else left shift */
83 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
84 ** a second loop below computes the remaining 1 to 3 samples. */
90 /* C = A << shiftBits */
91 /* Shift the inputs and then store the results in the destination buffer. */
92 #ifndef ARM_MATH_BIG_ENDIAN
94 *__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
95 __SSAT((in2 << shiftBits), 16), 16);
99 *__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
100 __SSAT((in1 << shiftBits), 16), 16);
102 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
107 #ifndef ARM_MATH_BIG_ENDIAN
109 *__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
110 __SSAT((in2 << shiftBits), 16), 16);
114 *__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
115 __SSAT((in1 << shiftBits), 16), 16);
117 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
119 /* Decrement the loop counter */
123 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
124 ** No loop unrolling is used. */
125 blkCnt = blockSize % 0x4u;
129 /* C = A << shiftBits */
130 /* Shift and then store the results in the destination buffer. */
131 *pDst++ = __SSAT((*pSrc++ << shiftBits), 16);
133 /* Decrement the loop counter */
139 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
140 ** a second loop below computes the remaining 1 to 3 samples. */
146 /* C = A >> shiftBits */
147 /* Shift the inputs and then store the results in the destination buffer. */
148 #ifndef ARM_MATH_BIG_ENDIAN
150 *__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
151 (in2 >> -shiftBits), 16);
155 *__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
156 (in1 >> -shiftBits), 16);
158 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
163 #ifndef ARM_MATH_BIG_ENDIAN
165 *__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
166 (in2 >> -shiftBits), 16);
170 *__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
171 (in1 >> -shiftBits), 16);
173 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
175 /* Decrement the loop counter */
179 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
180 ** No loop unrolling is used. */
181 blkCnt = blockSize % 0x4u;
185 /* C = A >> shiftBits */
186 /* Shift the inputs and then store the results in the destination buffer. */
187 *pDst++ = (*pSrc++ >> -shiftBits);
189 /* Decrement the loop counter */
196 /* Run the below code for Cortex-M0 */
198 /* Getting the sign of shiftBits */
199 sign = (shiftBits & 0x80);
201 /* If the shift value is positive then do right shift else left shift */
204 /* Initialize blkCnt with number of samples */
209 /* C = A << shiftBits */
210 /* Shift and then store the results in the destination buffer. */
211 *pDst++ = __SSAT(((q31_t) * pSrc++ << shiftBits), 16);
213 /* Decrement the loop counter */
219 /* Initialize blkCnt with number of samples */
224 /* C = A >> shiftBits */
225 /* Shift the inputs and then store the results in the destination buffer. */
226 *pDst++ = (*pSrc++ >> -shiftBits);
228 /* Decrement the loop counter */
233 #endif /* #ifndef ARM_MATH_CM0 */
238 * @} end of shift group