Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_fft_float32.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test_suite_fft_float32.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35#include <string.h>
36
37#include "NE10_dsp.h"
38#include "NE10_macros.h"
39#include "seatest.h"
40#include "unit_test_common.h"
41
42
43/* ----------------------------------------------------------------------
44** Global defines
45** ------------------------------------------------------------------- */
46
47/* Max FFT Length and double buffer for real and imag */
48#define TEST_LENGTH_SAMPLES (32768)
49#define MIN_LENGTH_SAMPLES_CPX (4)
50#define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
51
52#define TEST_COUNT 10000000
53
54/* ----------------------------------------------------------------------
55** Test input data for F32
56** Generated by the MATLAB rand() function
57** ------------------------------------------------------------------- */
58
59static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES * 2];
60
61/* ----------------------------------------------------------------------
62** Defines each of the tests performed
63** ------------------------------------------------------------------- */
64
65//input and output
66static ne10_float32_t * guarded_in_c = NULL;
67static ne10_float32_t * guarded_in_neon = NULL;
68static ne10_float32_t * in_c = NULL;
69static ne10_float32_t * in_neon = NULL;
70
71static ne10_float32_t * guarded_out_c = NULL;
72static ne10_float32_t * guarded_out_neon = NULL;
73static ne10_float32_t * out_c = NULL;
74static ne10_float32_t * out_neon = NULL;
75
76static ne10_float32_t snr = 0.0f;
77
78static ne10_int64_t time_c = 0;
79static ne10_int64_t time_neon = 0;
80static ne10_float32_t time_speedup = 0.0f;
81static ne10_float32_t time_savings = 0.0f;
82
83static ne10_fft_cfg_float32_t cfg_c;
84static ne10_fft_cfg_float32_t cfg_neon;
85
86static ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize);
87
88void test_fft_c2c_1d_float32_conformance()
89{
90 ne10_int32_t fftSize = 0;
91 ne10_int32_t flag_result = NE10_OK;
92
93 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
94
95 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
96 {
97 fprintf (stdout, "FFT size %d\n", fftSize);
98 flag_result = test_c2c_alloc (fftSize);
99 if (flag_result == NE10_ERR)
100 {
101 return;
102 }
103
104 /* FFT test */
105 memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
106 memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
107
108 GUARD_ARRAY (out_c, fftSize * 2);
109 GUARD_ARRAY (out_neon, fftSize * 2);
110
112 ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 0);
113
114 CHECK_ARRAY_GUARD (out_c, fftSize * 2);
115 CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
116
117 //conformance test
118 snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
119 assert_false ( (snr < SNR_THRESHOLD));
120
121 /* IFFT test */
122 memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
123 memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
124
125 GUARD_ARRAY (out_c, fftSize * 2);
126 GUARD_ARRAY (out_neon, fftSize * 2);
127
129 ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 1);
130
131 CHECK_ARRAY_GUARD (out_c, fftSize * 2);
132 CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
133
134 //conformance test
135 snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
136 assert_false ( (snr < SNR_THRESHOLD));
137
138 NE10_FREE (cfg_c);
139 NE10_FREE (cfg_neon);
140 }
141}
142
143void test_fft_c2c_1d_float32_performance()
144{
145 ne10_int32_t i = 0;
146 ne10_int32_t fftSize = 0;
147 ne10_int32_t flag_result = NE10_OK;
148 ne10_int32_t test_loop = 0;
149
150 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
151 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
152
153 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
154 {
155 fprintf (stdout, "FFT size %d\n", fftSize);
156
157 /* FFT test */
158 memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
159 memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
160 flag_result = test_c2c_alloc (fftSize);
161 if (flag_result == NE10_ERR)
162 {
163 return;
164 }
165
166 test_loop = TEST_COUNT / fftSize;
167
168 GET_TIME
169 (
170 time_c,
171 {
172 for (i = 0; i < test_loop; i++)
174 }
175 );
176 GET_TIME
177 (
178 time_neon,
179 {
180 for (i = 0; i < test_loop; i++)
181 ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 0);
182 }
183 );
184
185 time_speedup = (ne10_float32_t) time_c / time_neon;
186 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
187 ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
188
189 /* IFFT test */
190 memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_float32_t));
191 memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_float32_t));
192
193 GET_TIME
194 (
195 time_c,
196 {
197 for (i = 0; i < test_loop; i++)
199 }
200 );
201 GET_TIME
202 (
203 time_neon,
204 {
205 for (i = 0; i < test_loop; i++)
206 ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 1);
207 }
208 );
209
210 time_speedup = (ne10_float32_t) time_c / time_neon;
211 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
212 ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
213
214 NE10_FREE (cfg_c);
215 NE10_FREE (cfg_neon);
216 }
217}
218
219void test_fft_r2c_1d_float32_conformance()
220{
221
222 ne10_int32_t i = 0;
223 ne10_int32_t fftSize = 0;
225
226 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
227
228 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
229 {
230 fprintf (stdout, "FFT size %d\n", fftSize);
231
232 /* FFT test */
233 memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
234 memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
235 cfg = ne10_fft_alloc_r2c_float32 (fftSize);
236 if (cfg == NULL)
237 {
238 fprintf (stdout, "======ERROR, FFT alloc fails\n");
239 return;
240 }
241
242 GUARD_ARRAY (out_c, (fftSize / 2 + 1) * 2);
243 GUARD_ARRAY (out_neon, (fftSize / 2 + 1) * 2);
244
246 ne10_fft_r2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, in_neon, cfg);
247
248 CHECK_ARRAY_GUARD (out_c, (fftSize / 2 + 1) * 2);
249 CHECK_ARRAY_GUARD (out_neon, (fftSize / 2 + 1) * 2);
250
251 //conformance test
252 snr = CAL_SNR_FLOAT32 (out_c, out_neon, (fftSize / 2 + 1) * 2);
253 assert_false ( (snr < SNR_THRESHOLD));
254
255 /* IFFT test */
256 for (i = 1; i < (fftSize / 2); i++)
257 {
258 in_c[2 * i] = testInput_f32[2 * i];
259 in_c[2 * i + 1] = testInput_f32[2 * i + 1];
260 in_c[2 * (fftSize - i)] = in_c[2 * i];
261 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
262 }
263 in_c[0] = testInput_f32[0];
264 in_c[1] = 0;
265 in_c[fftSize] = testInput_f32[1];
266 in_c[fftSize + 1] = 0;
267 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_float32_t));
268
269 GUARD_ARRAY (out_c, fftSize);
270 GUARD_ARRAY (out_neon, fftSize);
271
273 ne10_fft_c2r_1d_float32_neon (out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg);
274
275 CHECK_ARRAY_GUARD (out_c, fftSize);
276 CHECK_ARRAY_GUARD (out_neon, fftSize);
277
278 //conformance test
279 snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize);
280 assert_false ( (snr < SNR_THRESHOLD));
281
282 NE10_FREE (cfg);
283 }
284}
285
286void test_fft_r2c_1d_float32_performance()
287{
288
289 ne10_int32_t i = 0;
290 ne10_int32_t fftSize = 0;
292 ne10_int32_t test_loop = 0;
293
294 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
295 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
296
297 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
298 {
299 fprintf (stdout, "FFT size %d\n", fftSize);
300
301 /* FFT test */
302 memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
303 memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
304 cfg = ne10_fft_alloc_r2c_float32 (fftSize);
305 if (cfg == NULL)
306 {
307 fprintf (stdout, "======ERROR, FFT alloc fails\n");
308 return;
309 }
310 test_loop = TEST_COUNT / fftSize;
311
312 GET_TIME
313 (
314 time_c,
315 {
316 for (i = 0; i < test_loop; i++)
318 }
319 );
320 GET_TIME
321 (
322 time_neon,
323 {
324 for (i = 0; i < test_loop; i++)
325 ne10_fft_r2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, in_neon, cfg);
326 }
327 );
328
329 time_speedup = (ne10_float32_t) time_c / time_neon;
330 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
331 ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
332
333 /* IFFT test */
334 for (i = 1; i < (fftSize / 2); i++)
335 {
336 in_c[2 * i] = testInput_f32[2 * i];
337 in_c[2 * i + 1] = testInput_f32[2 * i + 1];
338 in_c[2 * (fftSize - i)] = in_c[2 * i];
339 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
340 }
341 in_c[0] = testInput_f32[0];
342 in_c[1] = 0;
343 in_c[fftSize] = testInput_f32[1];
344 in_c[fftSize + 1] = 0;
345 memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_float32_t));
346
347 GET_TIME
348 (
349 time_c,
350 {
351 for (i = 0; i < test_loop; i++)
353 }
354 );
355 GET_TIME
356 (
357 time_neon,
358 {
359 for (i = 0; i < test_loop; i++)
360 ne10_fft_c2r_1d_float32_neon (out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg);
361 }
362 );
363
364 time_speedup = (ne10_float32_t) time_c / time_neon;
365 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
366 ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
367
368 NE10_FREE (cfg);
369 }
370}
371
372static void my_test_setup (void)
373{
374 ne10_log_buffer_ptr = ne10_log_buffer;
375 ne10_int32_t i;
376
377 /* init input memory */
378 guarded_in_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
379 guarded_in_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
380 in_c = guarded_in_c + ARRAY_GUARD_LEN;
381 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
382
383 /* init dst memory */
384 guarded_out_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
385 guarded_out_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
386 out_c = guarded_out_c + ARRAY_GUARD_LEN;
387 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
388
389 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
390 {
391 testInput_f32[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
392 }
393}
394
395static void my_test_teardown (void)
396{
397 NE10_FREE (guarded_in_c);
398 NE10_FREE (guarded_in_neon);
399 NE10_FREE (guarded_out_c);
400 NE10_FREE (guarded_out_neon);
401}
402
403void test_fft_c2c_1d_float32()
404{
405#if defined (SMOKE_TEST)||(REGRESSION_TEST)
406 test_fft_c2c_1d_float32_conformance();
407#endif
408
409#if defined (PERFORMANCE_TEST)
410 test_fft_c2c_1d_float32_performance();
411#endif
412}
413
414void test_fft_r2c_1d_float32()
415{
416#if defined (SMOKE_TEST)||(REGRESSION_TEST)
417 test_fft_r2c_1d_float32_conformance();
418#endif
419
420#if defined (PERFORMANCE_TEST)
421 test_fft_r2c_1d_float32_performance();
422#endif
423}
424
425void test_fixture_fft_c2c_1d_float32 (void)
426{
427 test_fixture_start(); // starts a fixture
428
429 fixture_setup (my_test_setup);
430
431 run_test (test_fft_c2c_1d_float32); // run tests
432
433 fixture_teardown(my_test_teardown);
434
435 test_fixture_end(); // ends a fixture
436}
437
438void test_fixture_fft_r2c_1d_float32 (void)
439{
440 test_fixture_start(); // starts a fixture
441
442 fixture_setup (my_test_setup);
443
444 run_test (test_fft_r2c_1d_float32); // run tests
445
446 fixture_teardown(my_test_teardown);
447
448 test_fixture_end(); // ends a fixture
449}
450
451ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize)
452{
453 NE10_FREE (cfg_c);
454 NE10_FREE (cfg_neon);
455
456 cfg_c = ne10_fft_alloc_c2c_float32_c (fftSize);
457 if (cfg_c == NULL)
458 {
459 fprintf (stdout, "======ERROR, FFT alloc fails\n");
460 return NE10_ERR;
461 }
462
463 cfg_neon = ne10_fft_alloc_c2c_float32_neon (fftSize);
464 if (cfg_neon == NULL)
465 {
466 NE10_FREE (cfg_c);
467 fprintf (stdout, "======ERROR, FFT alloc fails\n");
468 return NE10_ERR;
469 }
470 return NE10_OK;
471}
void ne10_fft_c2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition NE10_fft.c:337
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
void ne10_fft_r2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
void ne10_fft_c2r_1d_float32_neon(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
structure for the floating point FFT state
Definition NE10_types.h:241