Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_fir_sparse.c
1/*
2 * Copyright 2012-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test_suite_fir_sparse.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35
36#include "NE10_dsp.h"
37#include "seatest.h"
38
39
40/* ----------------------------------------------------------------------
41** Global defines
42** ------------------------------------------------------------------- */
43
44/* Max FFT Length 1024 and double buffer for real and imag */
45#define TEST_LENGTH_SAMPLES 320
46#define MAX_BLOCKSIZE 320
47#define MAX_NUMTAPS 100
48#define MAX_DELAY 500
49
50#define TEST_COUNT 5000
51
52//input and output
53static ne10_float32_t * guarded_in_c = NULL;
54static ne10_float32_t * guarded_in_neon = NULL;
55static ne10_float32_t * in_c = NULL;
56static ne10_float32_t * in_neon = NULL;
57
58static ne10_float32_t * guarded_out_c = NULL;
59static ne10_float32_t * guarded_out_neon = NULL;
60static ne10_float32_t * out_c = NULL;
61static ne10_float32_t * out_neon = NULL;
62
63static ne10_float32_t * guarded_fir_state_c = NULL;
64static ne10_float32_t * guarded_fir_state_neon = NULL;
65static ne10_float32_t * fir_state_c = NULL;
66static ne10_float32_t * fir_state_neon = NULL;
67
68static ne10_float32_t scratch_c[MAX_BLOCKSIZE] = {0};
69static ne10_float32_t scratch_neon[MAX_BLOCKSIZE] = {0};
70
71#if defined (SMOKE_TEST)||(REGRESSION_TEST)
72static ne10_float32_t snr = 0.0f;
73#endif
74#ifdef PERFORMANCE_TEST
75static ne10_int64_t time_c = 0;
76static ne10_int64_t time_neon = 0;
77static ne10_float32_t time_speedup = 0.0f;
78static ne10_float32_t time_savings = 0.0f;
79#endif
80
81/* ----------------------------------------------------------------------
82** Coefficients for 5-tap filter for F32
83** ------------------------------------------------------------------- */
84
85static ne10_float32_t testCoeffs5_f32[5] =
86{
87 1.749140, 0.132598, 0.325228, -0.793809, 0.314924
88};
89
90/* ----------------------------------------------------------------------
91** Coefficients for 32-tap filter for F32
92** ------------------------------------------------------------------- */
93// static ne10_float32_t testCoeffs32_f32[32] =
94// {
95// 1.749140, 0.132598, 0.325228, -0.793809, 0.314924, -0.527270, 0.932267, 1.164664,
96// -2.045669, -0.644373, 1.741066, 0.486768, 1.048829, 1.488575, 1.270501, -1.856124,
97// 2.134321, 1.435847, -0.917302, -1.106077, 0.810571, 0.698543, -0.401583, 1.268751,
98// -0.783608, 0.213266, 0.787898, 0.896682, -0.186917, 1.013182, 0.248435, 0.059608
99// };
100
101/* ----------------------------------------------------------------------
102** Delay offsets for 5-tap Sparse filter for F32
103** ------------------------------------------------------------------- */
104static ne10_int32_t tapDelay5_f32[5] =
105{
106 95, 23, 61, 49, 89
107};
108
109/* ----------------------------------------------------------------------
110** Delay offsets for 32-tap Sparse filter for F32
111** ------------------------------------------------------------------- */
112// static ne10_int32_t tapDelay32_f32[32] =
113// {
114// 95, 23, 61, 49, 89, 76, 46, 2,
115// 82, 44, 62, 79, 92, 74, 18, 41,
116// 94, 92, 41, 89, 6, 35, 81, 1,
117// 14, 20, 20, 60, 27, 20, 2, 75
118// };
119
120/* ----------------------------------------------------------------------
121** Test input data for F32
122** Generated by the MATLAB rand() function
123** ------------------------------------------------------------------- */
124static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
125{
126 -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
127 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
128 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
129 -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
130 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
131 -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
132 -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
133 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
134 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
135 -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975,
136 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045,
137 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904,
138 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635,
139 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903,
140 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465,
141 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218,
142 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917,
143 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490,
144 -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294,
145 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363,
146 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325,
147 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353,
148 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160,
149 -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894,
150 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209,
151 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048,
152 -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224,
153 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439,
154 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155,
155 -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070,
156 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649,
157 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134,
158 -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085,
159 -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095,
160 -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118,
161 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084,
162 -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101,
163 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395,
164 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216,
165 -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934
166};
167
168/* ----------------------------------------------------------------------
169** Defines each of the tests performed
170** ------------------------------------------------------------------- */
171typedef struct
172{
173 ne10_uint32_t blockSize;
174 ne10_uint32_t numTaps;
175 ne10_uint32_t numFrames;
176 ne10_uint32_t maxDelay;
177 ne10_int32_t *tapDelay;
178 ne10_float32_t *coeffsF32;
179 ne10_float32_t *inputF32;
181
182/* All Test configurations, 100% Code Coverage */
183#if defined (SMOKE_TEST)||(REGRESSION_TEST)
184static test_config CONFIG[] =
185{
186 {0, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
187 {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
188 //{2, 0, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
189 {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
190 {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
191 //{64, 32, 5, 100, &tapDelay32_f32[0], &testCoeffs32_f32[0], &testInput_f32[0]}
192};
193#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
194#endif
195#ifdef PERFORMANCE_TEST
196static test_config CONFIG_PERF[] =
197{
198 {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
199 {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
200 {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
201};
202#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
203#endif
204
205
206void test_fir_sparse_case0()
207{
209
210 ne10_uint16_t loop = 0;
211 ne10_uint16_t block = 0;
212 ne10_uint16_t i = 0;
213
214 test_config *config;
215 ne10_result_t status_c = NE10_OK;
216 ne10_result_t status_neon = NE10_OK;
217
218 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
219
220 /* init input memory */
221 NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
222 NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
223
224 /* init dst memory */
225 NE10_DST_ALLOC (out_c, guarded_out_c, MAX_DELAY + TEST_LENGTH_SAMPLES);
226 NE10_DST_ALLOC (out_neon, guarded_out_neon, MAX_DELAY + TEST_LENGTH_SAMPLES);
227
228 /* init state memory */
229 NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_DELAY + MAX_BLOCKSIZE);
230 NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_DELAY + MAX_BLOCKSIZE);
231
232#ifdef ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
233#if defined (SMOKE_TEST)||(REGRESSION_TEST)
234 ne10_uint16_t pos = 0;
235 for (loop = 0; loop < NUM_TESTS; loop++)
236 {
237 config = &CONFIG[loop];
238
239 /* Initialize the CFFT/CIFFT module */
240 status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
241 status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
242
243 if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
244 {
245 fprintf (stdout, "initialization error\n");
246 }
247
248 /* copy input to input buffer */
249 for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
250 {
251 in_c[i] = testInput_f32[i];
252 in_neon[i] = testInput_f32[i];
253 scratch_c[i] = 0;
254 scratch_neon[i] = 0;
255 }
256
257 GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
258 GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
259
260 for (block = 0; block < config->numFrames; block++)
261 {
262 ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
263 }
264 for (block = 0; block < config->numFrames; block++)
265 {
266 ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
267 }
268
269 CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
270 CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
271
272 //conformance test 1: compare snr
273 snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
274 assert_false ( (snr < SNR_THRESHOLD));
275
276 //conformance test 2: compare output of C and neon
277#if defined (DEBUG_TRACE)
278 printf ("--------------------config %d\n", loop);
279 printf ("snr %f\n", snr);
280#endif
281 for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
282 {
283#if defined (DEBUG_TRACE)
284 printf ("pos %d \n", pos);
285 printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
286#endif
287 assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
288 }
289
290 }
291#endif
292#endif // ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
293
294#ifdef PERFORMANCE_TEST
295 ne10_uint16_t k;
296 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
297 for (loop = 0; loop < NUM_PERF_TESTS; loop++)
298 {
299 config = &CONFIG_PERF[loop];
300
301 /* Initialize the CFFT/CIFFT module */
302 status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
303 status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
304
305 if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
306 {
307 fprintf (stdout, "initialization error\n");
308 }
309
310 /* copy input to input buffer */
311 for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
312 {
313 in_c[i] = testInput_f32[i];
314 in_neon[i] = testInput_f32[i];
315 }
316
317 GET_TIME
318 (
319 time_c,
320 {
321 for (k = 0; k < TEST_COUNT; k++)
322 {
323 for (block = 0; block < config->numFrames; block++)
324 {
325 ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
326 }
327 }
328 }
329 );
330
331#ifdef ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
332 GET_TIME
333 (
334 time_neon,
335 {
336 for (k = 0; k < TEST_COUNT; k++)
337 {
338 for (block = 0; block < config->numFrames; block++)
339 {
340 ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
341 }
342 }
343 }
344 );
345#endif // ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
346
347 time_speedup = (ne10_float32_t) time_c / time_neon;
348 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
349 ne10_log (__FUNCTION__, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->numTaps, time_c, time_neon, time_savings, time_speedup);
350 }
351#endif
352
353 free (guarded_in_c);
354 free (guarded_in_neon);
355 free (guarded_out_c);
356 free (guarded_out_neon);
357 free (guarded_fir_state_c);
358 free (guarded_fir_state_neon);
359 fprintf (stdout, "----------%30s end\n", __FUNCTION__);
360}
361
362void test_fir_sparse()
363{
364 test_fir_sparse_case0();
365}
366
367static void my_test_setup (void)
368{
369 ne10_log_buffer_ptr = ne10_log_buffer;
370}
371
372void test_fixture_fir_sparse (void)
373{
374 test_fixture_start(); // starts a fixture
375
376 fixture_setup (my_test_setup);
377
378 run_test (test_fir_sparse); // run tests
379
380 test_fixture_end(); // ends a fixture
381}
void ne10_fir_sparse_float_c(ne10_fir_sparse_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_float32_t *pScratchIn, ne10_uint32_t blockSize)
Processing function for the floating-point sparse FIR filter.
Definition NE10_fir.c:1439
Instance structure for the floating-point FIR Sparse filter.
Definition NE10_types.h:407