47#include "NE10_types.h"
48#include "NE10_macros.h"
50#include "NE10_fft_generic_float32.h"
58 const ne10_int32_t fstride,
59 const ne10_int32_t out_step,
60 const ne10_int32_t nfft,
61 const ne10_int32_t is_first_stage,
62 const ne10_int32_t is_inverse,
63 const ne10_int32_t is_scaled)
68 const ne10_int32_t in_step = nfft / 2;
72 for (f_count = fstride; f_count > 0; f_count--)
74 for (m_count = out_step; m_count > 0; m_count--)
76 scratch_in[0] = Fin[0 * in_step];
77 scratch_in[1] = Fin[1 * in_step];
81 scratch_in[0].i = -scratch_in[0].i;
82 scratch_in[1].i = -scratch_in[1].i;
85#ifdef NE10_DSP_CFFT_SCALING
86 if (is_scaled && is_first_stage)
88 const ne10_float32_t one_by_nfft = 1.0 / nfft;
90 scratch_in[0].r *= one_by_nfft;
91 scratch_in[0].i *= one_by_nfft;
92 scratch_in[1].r *= one_by_nfft;
93 scratch_in[1].i *= one_by_nfft;
102 scratch_tw[0] = twiddles[0 * out_step];
104 FFT2_MUL_TW (scratch, scratch_in, scratch_tw);
106 scratch_in[0] = scratch[0];
107 scratch_in[1] = scratch[1];
110 FFT2_FCU (scratch_out, scratch_in);
114 scratch_out[0].i = -scratch_out[0].i;
115 scratch_out[1].i = -scratch_out[1].i;
118 Fout[0 * out_step] = scratch_out[0];
119 Fout[1 * out_step] = scratch_out[1];
135 twiddles -= out_step;
136 Fout += (2 - 1) * out_step;
144 const ne10_int32_t fstride,
145 const ne10_int32_t out_step,
146 const ne10_int32_t nfft,
147 const ne10_int32_t is_first_stage,
148 const ne10_int32_t is_inverse,
149 const ne10_int32_t is_scaled)
154 const ne10_int32_t in_step = nfft / 4;
155 ne10_int32_t f_count;
156 ne10_int32_t m_count;
158 for (f_count = fstride; f_count > 0; f_count--)
160 for (m_count = out_step; m_count > 0; m_count--)
162 scratch_in[0] = Fin[0 * in_step];
163 scratch_in[1] = Fin[1 * in_step];
164 scratch_in[2] = Fin[2 * in_step];
165 scratch_in[3] = Fin[3 * in_step];
169 scratch_in[0].i = -scratch_in[0].i;
170 scratch_in[1].i = -scratch_in[1].i;
171 scratch_in[2].i = -scratch_in[2].i;
172 scratch_in[3].i = -scratch_in[3].i;
175#ifdef NE10_DSP_CFFT_SCALING
176 if (is_scaled && is_first_stage)
178 const ne10_float32_t one_by_nfft = 1.0 / nfft;
180 scratch_in[0].r *= one_by_nfft;
181 scratch_in[0].i *= one_by_nfft;
182 scratch_in[1].r *= one_by_nfft;
183 scratch_in[1].i *= one_by_nfft;
184 scratch_in[2].r *= one_by_nfft;
185 scratch_in[2].i *= one_by_nfft;
186 scratch_in[3].r *= one_by_nfft;
187 scratch_in[3].i *= one_by_nfft;
196 scratch_tw[0] = twiddles[0 * out_step];
197 scratch_tw[1] = twiddles[1 * out_step];
198 scratch_tw[2] = twiddles[2 * out_step];
200 FFT4_MUL_TW (scratch, scratch_in, scratch_tw);
202 scratch_in[0] = scratch[0];
203 scratch_in[1] = scratch[1];
204 scratch_in[2] = scratch[2];
205 scratch_in[3] = scratch[3];
208 FFT4_FCU (scratch_out, scratch_in);
212 scratch_out[0].i = -scratch_out[0].i;
213 scratch_out[1].i = -scratch_out[1].i;
214 scratch_out[2].i = -scratch_out[2].i;
215 scratch_out[3].i = -scratch_out[3].i;
218 Fout[0 * out_step] = scratch_out[0];
219 Fout[1 * out_step] = scratch_out[1];
220 Fout[2 * out_step] = scratch_out[2];
221 Fout[3 * out_step] = scratch_out[3];
237 twiddles -= out_step;
238 Fout += (4 - 1) * out_step;
246 const ne10_int32_t fstride,
247 const ne10_int32_t out_step,
248 const ne10_int32_t nfft,
249 const ne10_int32_t is_first_stage,
250 const ne10_int32_t is_inverse,
251 const ne10_int32_t is_scaled)
253 assert (is_first_stage == 1);
258 const ne10_int32_t in_step = nfft / 8;
259 ne10_int32_t f_count;
260 ne10_int32_t m_count;
262 for (f_count = fstride; f_count > 0; f_count--)
264 for (m_count = out_step; m_count > 0; m_count--)
266 scratch_in[0] = Fin[0 * in_step];
267 scratch_in[1] = Fin[1 * in_step];
268 scratch_in[2] = Fin[2 * in_step];
269 scratch_in[3] = Fin[3 * in_step];
270 scratch_in[4] = Fin[4 * in_step];
271 scratch_in[5] = Fin[5 * in_step];
272 scratch_in[6] = Fin[6 * in_step];
273 scratch_in[7] = Fin[7 * in_step];
277 scratch_in[0].i = -scratch_in[0].i;
278 scratch_in[1].i = -scratch_in[1].i;
279 scratch_in[2].i = -scratch_in[2].i;
280 scratch_in[3].i = -scratch_in[3].i;
281 scratch_in[4].i = -scratch_in[4].i;
282 scratch_in[5].i = -scratch_in[5].i;
283 scratch_in[6].i = -scratch_in[6].i;
284 scratch_in[7].i = -scratch_in[7].i;
287#ifdef NE10_DSP_CFFT_SCALING
290 const ne10_float32_t one_by_nfft = 1.0 / nfft;
292 scratch_in[0].r *= one_by_nfft;
293 scratch_in[0].i *= one_by_nfft;
294 scratch_in[1].r *= one_by_nfft;
295 scratch_in[1].i *= one_by_nfft;
296 scratch_in[2].r *= one_by_nfft;
297 scratch_in[2].i *= one_by_nfft;
298 scratch_in[3].r *= one_by_nfft;
299 scratch_in[3].i *= one_by_nfft;
300 scratch_in[4].r *= one_by_nfft;
301 scratch_in[4].i *= one_by_nfft;
302 scratch_in[5].r *= one_by_nfft;
303 scratch_in[5].i *= one_by_nfft;
304 scratch_in[6].r *= one_by_nfft;
305 scratch_in[6].i *= one_by_nfft;
306 scratch_in[7].r *= one_by_nfft;
307 scratch_in[7].i *= one_by_nfft;
311 FFT8_FCU (scratch_out, scratch_in);
315 scratch_out[0].i = -scratch_out[0].i;
316 scratch_out[1].i = -scratch_out[1].i;
317 scratch_out[2].i = -scratch_out[2].i;
318 scratch_out[3].i = -scratch_out[3].i;
319 scratch_out[4].i = -scratch_out[4].i;
320 scratch_out[5].i = -scratch_out[5].i;
321 scratch_out[6].i = -scratch_out[6].i;
322 scratch_out[7].i = -scratch_out[7].i;
325 Fout[0*out_step] = scratch_out[0];
326 Fout[1*out_step] = scratch_out[1];
327 Fout[2*out_step] = scratch_out[2];
328 Fout[3*out_step] = scratch_out[3];
329 Fout[4*out_step] = scratch_out[4];
330 Fout[5*out_step] = scratch_out[5];
331 Fout[6*out_step] = scratch_out[6];
332 Fout[7*out_step] = scratch_out[7];
343 const ne10_int32_t fstride,
344 const ne10_int32_t out_step,
345 const ne10_int32_t nfft,
346 const ne10_int32_t is_first_stage,
347 const ne10_int32_t is_inverse,
348 const ne10_int32_t is_scaled)
353 const ne10_int32_t in_step = nfft / 3;
354 ne10_int32_t f_count;
355 ne10_int32_t m_count;
357 for (f_count = fstride; f_count > 0; f_count--)
359 for (m_count = out_step; m_count > 0; m_count--)
361 scratch_in[0] = Fin[0 * in_step];
362 scratch_in[1] = Fin[1 * in_step];
363 scratch_in[2] = Fin[2 * in_step];
367 scratch_in[0].i = -scratch_in[0].i;
368 scratch_in[1].i = -scratch_in[1].i;
369 scratch_in[2].i = -scratch_in[2].i;
372#ifdef NE10_DSP_CFFT_SCALING
373 if (is_scaled && is_first_stage)
375 const ne10_float32_t one_by_nfft = 1.0 / nfft;
377 scratch_in[0].r *= one_by_nfft;
378 scratch_in[0].i *= one_by_nfft;
379 scratch_in[1].r *= one_by_nfft;
380 scratch_in[1].i *= one_by_nfft;
381 scratch_in[2].r *= one_by_nfft;
382 scratch_in[2].i *= one_by_nfft;
391 scratch_tw[0] = twiddles[0 * out_step];
392 scratch_tw[1] = twiddles[1 * out_step];
394 FFT3_MUL_TW (scratch, scratch_in, scratch_tw);
396 scratch_in[0] = scratch[0];
397 scratch_in[1] = scratch[1];
398 scratch_in[2] = scratch[2];
401 FFT3_FCU (scratch_out, scratch_in);
405 scratch_out[0].i = -scratch_out[0].i;
406 scratch_out[1].i = -scratch_out[1].i;
407 scratch_out[2].i = -scratch_out[2].i;
410 Fout[0 * out_step] = scratch_out[0];
411 Fout[1 * out_step] = scratch_out[1];
412 Fout[2 * out_step] = scratch_out[2];
428 twiddles -= out_step;
429 Fout += (3 - 1) * out_step;
437 const ne10_int32_t fstride,
438 const ne10_int32_t out_step,
439 const ne10_int32_t nfft,
440 const ne10_int32_t is_first_stage,
441 const ne10_int32_t is_inverse,
442 const ne10_int32_t is_scaled)
447 const ne10_int32_t in_step = nfft / 5;
448 ne10_int32_t f_count;
449 ne10_int32_t m_count;
451 for (f_count = fstride; f_count > 0; f_count--)
453 for (m_count = out_step; m_count > 0; m_count--)
455 scratch_in[0] = Fin[0 * in_step];
456 scratch_in[1] = Fin[1 * in_step];
457 scratch_in[2] = Fin[2 * in_step];
458 scratch_in[3] = Fin[3 * in_step];
459 scratch_in[4] = Fin[4 * in_step];
463 scratch_in[0].i = -scratch_in[0].i;
464 scratch_in[1].i = -scratch_in[1].i;
465 scratch_in[2].i = -scratch_in[2].i;
466 scratch_in[3].i = -scratch_in[3].i;
467 scratch_in[4].i = -scratch_in[4].i;
470#ifdef NE10_DSP_CFFT_SCALING
471 if (is_scaled && is_first_stage)
473 const ne10_float32_t one_by_nfft = 1.0 / nfft;
475 scratch_in[0].r *= one_by_nfft;
476 scratch_in[0].i *= one_by_nfft;
477 scratch_in[1].r *= one_by_nfft;
478 scratch_in[1].i *= one_by_nfft;
479 scratch_in[2].r *= one_by_nfft;
480 scratch_in[2].i *= one_by_nfft;
481 scratch_in[3].r *= one_by_nfft;
482 scratch_in[3].i *= one_by_nfft;
483 scratch_in[4].r *= one_by_nfft;
484 scratch_in[4].i *= one_by_nfft;
493 scratch_tw[0] = twiddles[0 * out_step];
494 scratch_tw[1] = twiddles[1 * out_step];
495 scratch_tw[2] = twiddles[2 * out_step];
496 scratch_tw[3] = twiddles[3 * out_step];
498 FFT5_MUL_TW (scratch, scratch_in, scratch_tw);
500 scratch_in[0] = scratch[0];
501 scratch_in[1] = scratch[1];
502 scratch_in[2] = scratch[2];
503 scratch_in[3] = scratch[3];
504 scratch_in[4] = scratch[4];
507 FFT5_FCU (scratch_out, scratch_in);
511 scratch_out[0].i = -scratch_out[0].i;
512 scratch_out[1].i = -scratch_out[1].i;
513 scratch_out[2].i = -scratch_out[2].i;
514 scratch_out[3].i = -scratch_out[3].i;
515 scratch_out[4].i = -scratch_out[4].i;
518 Fout[0 * out_step] = scratch_out[0];
519 Fout[1 * out_step] = scratch_out[1];
520 Fout[2 * out_step] = scratch_out[2];
521 Fout[3 * out_step] = scratch_out[3];
522 Fout[4 * out_step] = scratch_out[4];
538 twiddles -= out_step;
539 Fout += (5 - 1) * out_step;
547 const ne10_int32_t radix,
548 const ne10_int32_t in_step,
549 const ne10_int32_t out_step,
550 const ne10_int32_t is_inverse,
551 const ne10_int32_t is_scaled)
554 ne10_int32_t f_count = in_step;
561 for (; f_count > 0; f_count--)
564 for (q1 = 0; q1 < radix; q1++)
566 scratch[q1] = Fin[in_step * q1];
569 scratch[q1].i = -scratch[q1].i;
570#ifdef NE10_DSP_CFFT_SCALING
573 const ne10_float32_t one_by_nfft = 1.0 / (radix * in_step);
574 scratch[q1].r *= one_by_nfft;
575 scratch[q1].i *= one_by_nfft;
582 for (q1 = 0; q1 < radix; q1++)
584 ne10_int32_t twidx = 0;
585 Fout[q1 * out_step] = scratch[0];
586 for (q = 1; q < radix; q++)
593 NE10_CPX_MUL_F32 (tmp, scratch[q], twiddles[twidx]);
594 NE10_CPX_ADDTO (Fout[q1 * out_step], tmp);
598 Fout[q1 * out_step].i = -Fout[q1 * out_step].i;
611 const ne10_int32_t *factors,
614 const ne10_int32_t is_inverse,
615 const ne10_int32_t is_scaled)
617 ne10_int32_t fstride, mstride, radix;
618 ne10_int32_t stage_count;
622 stage_count = factors[0];
623 fstride = factors[1];
625 radix = factors[stage_count << 1];
626 nfft = fstride * radix;
628 if (stage_count % 2 == 0)
630 ne10_swap_ptr (buffer, Fout);
637 ne10_radix_2_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
638 is_inverse, is_scaled);
641 ne10_radix_4_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
642 is_inverse, is_scaled);
645 ne10_radix_3_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
646 is_inverse, is_scaled);
649 ne10_radix_5_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
650 is_inverse, is_scaled);
653 ne10_radix_8_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
654 is_inverse, is_scaled);
656 ne10_radix_generic_butterfly_float32_c (Fout, Fin, twiddles, radix,
657 fstride, 1, is_inverse, is_scaled);
673 while (stage_count > 0)
675 ne10_swap_ptr (buffer, Fout);
679 radix = factors[stage_count << 1];
680 assert ((radix > 1) && (radix < 6));
686 ne10_radix_2_butterfly_float32_c (Fout, buffer, twiddles, fstride,
687 mstride, nfft, 0, is_inverse,
691 ne10_radix_3_butterfly_float32_c (Fout, buffer, twiddles, fstride,
692 mstride, nfft, 0, is_inverse,
696 ne10_radix_4_butterfly_float32_c (Fout, buffer, twiddles, fstride,
697 mstride, nfft, 0, is_inverse,
701 ne10_radix_5_butterfly_float32_c (Fout, buffer, twiddles, fstride,
702 mstride, nfft, 0, is_inverse,
707 twiddles += mstride * (radix - 1);
715 const ne10_int32_t *factors,
718 const ne10_int32_t is_scaled)
722 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
728 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
736 const ne10_int32_t *factors,
739 const ne10_int32_t is_scaled)
743 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
749 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,