73 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
74 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
80 #include <immintrin.h>
84 const float* qBuffer,
unsigned int num_points)
86 unsigned int number = 0;
87 float* complexVectorPtr = (
float*)complexVector;
88 const float* iBufferPtr = iBuffer;
89 const float* qBufferPtr = qBuffer;
91 const uint64_t eighthPoints = num_points / 8;
93 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
94 for(;number < eighthPoints; number++){
95 iValue = _mm256_load_ps(iBufferPtr);
96 qValue = _mm256_load_ps(qBufferPtr);
99 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
101 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
103 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
104 _mm256_store_ps(complexVectorPtr, cplxValue);
105 complexVectorPtr += 8;
107 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
108 _mm256_store_ps(complexVectorPtr, cplxValue);
109 complexVectorPtr += 8;
115 number = eighthPoints * 8;
116 for(; number < num_points; number++){
117 *complexVectorPtr++ = *iBufferPtr++;
118 *complexVectorPtr++ = *qBufferPtr++;
125 #include <xmmintrin.h>
129 const float* qBuffer,
unsigned int num_points)
131 unsigned int number = 0;
132 float* complexVectorPtr = (
float*)complexVector;
133 const float* iBufferPtr = iBuffer;
134 const float* qBufferPtr = qBuffer;
136 const uint64_t quarterPoints = num_points / 4;
138 __m128 iValue, qValue, cplxValue;
139 for(;number < quarterPoints; number++){
140 iValue = _mm_load_ps(iBufferPtr);
141 qValue = _mm_load_ps(qBufferPtr);
144 cplxValue = _mm_unpacklo_ps(iValue, qValue);
145 _mm_store_ps(complexVectorPtr, cplxValue);
146 complexVectorPtr += 4;
149 cplxValue = _mm_unpackhi_ps(iValue, qValue);
150 _mm_store_ps(complexVectorPtr, cplxValue);
151 complexVectorPtr += 4;
157 number = quarterPoints * 4;
158 for(; number < num_points; number++){
159 *complexVectorPtr++ = *iBufferPtr++;
160 *complexVectorPtr++ = *qBufferPtr++;
167 #include <arm_neon.h>
171 const float* qBuffer,
unsigned int num_points)
173 unsigned int quarter_points = num_points / 4;
175 float* complexVectorPtr = (
float*) complexVector;
177 float32x4x2_t complex_vec;
178 for(number=0; number < quarter_points; ++number) {
179 complex_vec.val[0] = vld1q_f32(iBuffer);
180 complex_vec.val[1] = vld1q_f32(qBuffer);
181 vst2q_f32(complexVectorPtr, complex_vec);
184 complexVectorPtr += 8;
187 for(number=quarter_points * 4; number < num_points; ++number) {
188 *complexVectorPtr++ = *iBuffer++;
189 *complexVectorPtr++ = *qBuffer++;
195 #ifdef LV_HAVE_GENERIC
199 const float* qBuffer,
unsigned int num_points)
201 float* complexVectorPtr = (
float*)complexVector;
202 const float* iBufferPtr = iBuffer;
203 const float* qBufferPtr = qBuffer;
206 for(number = 0; number < num_points; number++){
207 *complexVectorPtr++ = *iBufferPtr++;
208 *complexVectorPtr++ = *qBufferPtr++;
217 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
218 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
220 #include <inttypes.h>
224 #include <immintrin.h>
228 const float* qBuffer,
unsigned int num_points)
230 unsigned int number = 0;
231 float* complexVectorPtr = (
float*)complexVector;
232 const float* iBufferPtr = iBuffer;
233 const float* qBufferPtr = qBuffer;
235 const uint64_t eighthPoints = num_points / 8;
237 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
238 for(;number < eighthPoints; number++){
239 iValue = _mm256_loadu_ps(iBufferPtr);
240 qValue = _mm256_loadu_ps(qBufferPtr);
243 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
245 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
247 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
248 _mm256_storeu_ps(complexVectorPtr, cplxValue);
249 complexVectorPtr += 8;
251 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
252 _mm256_storeu_ps(complexVectorPtr, cplxValue);
253 complexVectorPtr += 8;
259 number = eighthPoints * 8;
260 for(; number < num_points; number++){
261 *complexVectorPtr++ = *iBufferPtr++;
262 *complexVectorPtr++ = *qBufferPtr++;