39 #include "unit_test_common.h" 47 #define TEST_LENGTH_SAMPLES (4096) 48 #define MIN_LENGTH_SAMPLES_CPX (4) 49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2) 51 #define SNR_THRESHOLD_INT16 15.0f 53 #define TEST_COUNT 250000 60 static ne10_int32_t testInput_i16_unscaled[TEST_LENGTH_SAMPLES * 2];
61 static ne10_int32_t testInput_i16_scaled[TEST_LENGTH_SAMPLES * 2];
62 static ne10_int16_t * guarded_in_c = NULL;
63 static ne10_int16_t * guarded_in_neon = NULL;
64 static ne10_int16_t * in_c = NULL;
65 static ne10_int16_t * in_neon = NULL;
67 static ne10_int16_t * guarded_out_c = NULL;
68 static ne10_int16_t * guarded_out_neon = NULL;
69 static ne10_int16_t * out_c = NULL;
70 static ne10_int16_t * out_neon = NULL;
72 static ne10_float32_t snr = 0.0f;
74 static ne10_int64_t time_c = 0;
75 static ne10_int64_t time_neon = 0;
76 static ne10_float32_t time_speedup = 0.0f;
77 static ne10_float32_t time_savings = 0.0f;
79 void test_fft_c2c_1d_int16_conformance()
83 ne10_int32_t fftSize = 0;
85 ne10_float32_t * out_c_tmp = NULL;
86 ne10_float32_t * out_neon_tmp = NULL;
88 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
91 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
92 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
93 in_c = guarded_in_c + ARRAY_GUARD_LEN;
94 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
97 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
98 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
99 out_c = guarded_out_c + ARRAY_GUARD_LEN;
100 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
102 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
103 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
105 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
107 testInput_i16_unscaled[i] = (ne10_int32_t) (drand48() * 1024) - 512;
108 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
110 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
112 fprintf (stdout,
"FFT size %d\n", fftSize);
116 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
121 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize *
sizeof (ne10_int16_t));
122 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize *
sizeof (ne10_int16_t));
124 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
125 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
130 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
131 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
134 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
136 out_c_tmp[i] = (ne10_float32_t) out_c[i];
137 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
139 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
140 assert_false ( (snr < SNR_THRESHOLD_INT16));
143 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize *
sizeof (ne10_int16_t));
144 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize *
sizeof (ne10_int16_t));
146 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
147 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
152 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
153 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
156 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
158 out_c_tmp[i] = (ne10_float32_t) out_c[i];
159 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
161 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
162 assert_false ( (snr < SNR_THRESHOLD_INT16));
165 memcpy (in_c, testInput_i16_scaled, 2 * fftSize *
sizeof (ne10_int16_t));
166 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize *
sizeof (ne10_int16_t));
168 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
169 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
174 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
175 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
178 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
180 out_c_tmp[i] = (ne10_float32_t) out_c[i];
181 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
183 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
184 assert_false ( (snr < SNR_THRESHOLD_INT16));
187 memcpy (in_c, testInput_i16_scaled, 2 * fftSize *
sizeof (ne10_int16_t));
188 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize *
sizeof (ne10_int16_t));
190 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
191 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
196 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int16_t));
197 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int16_t));
200 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
202 out_c_tmp[i] = (ne10_float32_t) out_c[i];
203 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
205 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
206 assert_false ( (snr < SNR_THRESHOLD_INT16));
211 NE10_FREE (guarded_in_c);
212 NE10_FREE (guarded_in_neon);
213 NE10_FREE (guarded_out_c);
214 NE10_FREE (guarded_out_neon);
215 NE10_FREE (out_c_tmp);
216 NE10_FREE (out_neon_tmp);
219 void test_fft_c2c_1d_int16_performance()
223 ne10_int32_t fftSize = 0;
225 ne10_int32_t test_loop = 0;
227 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
228 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
231 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
232 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
233 in_c = guarded_in_c + ARRAY_GUARD_LEN;
234 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
237 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
238 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
239 out_c = guarded_out_c + ARRAY_GUARD_LEN;
240 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
242 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
244 testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
245 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
247 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
249 fprintf (stdout,
"FFT size %d\n", fftSize);
253 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
256 test_loop = TEST_COUNT / fftSize;
259 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize *
sizeof (ne10_int16_t));
260 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize *
sizeof (ne10_int16_t));
266 for (i = 0; i < test_loop; i++)
274 for (i = 0; i < test_loop; i++)
279 time_speedup = (ne10_float32_t) time_c / time_neon;
280 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
281 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
284 memcpy (in_c, out_c, 2 * fftSize *
sizeof (ne10_int16_t));
285 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (ne10_int16_t));
291 for (i = 0; i < test_loop; i++)
299 for (i = 0; i < test_loop; i++)
304 time_speedup = (ne10_float32_t) time_c / time_neon;
305 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
306 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
308 memcpy (in_c, testInput_i16_scaled, 2 * fftSize *
sizeof (ne10_int16_t));
309 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize *
sizeof (ne10_int16_t));
315 for (i = 0; i < test_loop; i++)
323 for (i = 0; i < test_loop; i++)
328 time_speedup = (ne10_float32_t) time_c / time_neon;
329 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
330 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
333 memcpy (in_c, out_c, 2 * fftSize *
sizeof (ne10_int16_t));
334 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (ne10_int16_t));
340 for (i = 0; i < test_loop; i++)
348 for (i = 0; i < test_loop; i++)
353 time_speedup = (ne10_float32_t) time_c / time_neon;
354 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
355 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
360 NE10_FREE (guarded_in_c);
361 NE10_FREE (guarded_in_neon);
362 NE10_FREE (guarded_out_c);
363 NE10_FREE (guarded_out_neon);
366 void test_fft_r2c_1d_int16_conformance()
370 ne10_int32_t fftSize = 0;
372 ne10_float32_t * out_c_tmp = NULL;
373 ne10_float32_t * out_neon_tmp = NULL;
375 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
378 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
379 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
380 in_c = guarded_in_c + ARRAY_GUARD_LEN;
381 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
384 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
385 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
386 out_c = guarded_out_c + ARRAY_GUARD_LEN;
387 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
389 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
390 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
392 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
394 testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
395 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
397 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
399 fprintf (stdout,
"RFFT size %d\n", fftSize);
403 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
408 memcpy (in_c, testInput_i16_unscaled, fftSize *
sizeof (ne10_int16_t));
409 memcpy (in_neon, testInput_i16_unscaled, fftSize *
sizeof (ne10_int16_t));
411 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
412 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
417 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
418 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
421 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
423 out_c_tmp[i] = (ne10_float32_t) out_c[i];
424 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
426 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
427 assert_false ( (snr < SNR_THRESHOLD_INT16));
430 for (i = 1; i < (fftSize / 2); i++)
432 in_c[2 * i] = testInput_i16_unscaled[2 * i];
433 in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
434 in_c[2 * (fftSize - i)] = in_c[2 * i];
435 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
437 in_c[0] = testInput_i16_unscaled[0];
439 in_c[fftSize] = testInput_i16_unscaled[1];
440 in_c[fftSize + 1] = 0;
441 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int16_t));
443 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int16_t));
444 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int16_t));
449 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int16_t));
450 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int16_t));
453 for (i = 0; i < fftSize; i++)
455 out_c_tmp[i] = (ne10_float32_t) out_c[i];
456 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
458 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
459 assert_false ( (snr < SNR_THRESHOLD_INT16));
462 memcpy (in_c, testInput_i16_scaled, fftSize *
sizeof (ne10_int16_t));
463 memcpy (in_neon, testInput_i16_scaled, fftSize *
sizeof (ne10_int16_t));
465 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
466 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
471 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
472 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int16_t));
475 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
477 out_c_tmp[i] = (ne10_float32_t) out_c[i];
478 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
480 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
481 assert_false ( (snr < SNR_THRESHOLD_INT16));
484 for (i = 1; i < (fftSize / 2); i++)
486 in_c[2 * i] = testInput_i16_scaled[2 * i];
487 in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
488 in_c[2 * (fftSize - i)] = in_c[2 * i];
489 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
491 in_c[0] = testInput_i16_scaled[0];
493 in_c[fftSize] = testInput_i16_scaled[1];
494 in_c[fftSize + 1] = 0;
495 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int16_t));
497 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int16_t));
498 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int16_t));
503 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int16_t));
504 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int16_t));
507 for (i = 0; i < fftSize; i++)
509 out_c_tmp[i] = (ne10_float32_t) out_c[i];
510 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
512 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
513 assert_false ( (snr < SNR_THRESHOLD_INT16));
518 NE10_FREE (guarded_in_c);
519 NE10_FREE (guarded_in_neon);
520 NE10_FREE (guarded_out_c);
521 NE10_FREE (guarded_out_neon);
522 NE10_FREE (out_c_tmp);
523 NE10_FREE (out_neon_tmp);
526 void test_fft_r2c_1d_int16_performance()
530 ne10_int32_t fftSize = 0;
532 ne10_int32_t test_loop = 0;
534 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
535 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
538 guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
539 guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
540 in_c = guarded_in_c + ARRAY_GUARD_LEN;
541 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
544 guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
545 guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int16_t));
546 out_c = guarded_out_c + ARRAY_GUARD_LEN;
547 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
549 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
551 testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
552 testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
554 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
556 fprintf (stdout,
"FFT size %d\n", fftSize);
560 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
563 test_loop = TEST_COUNT / fftSize;
566 memcpy (in_c, testInput_i16_unscaled , fftSize *
sizeof (ne10_int16_t));
567 memcpy (in_neon, testInput_i16_unscaled , fftSize *
sizeof (ne10_int16_t));
573 for (i = 0; i < test_loop; i++)
581 for (i = 0; i < test_loop; i++)
586 time_speedup = (ne10_float32_t) time_c / time_neon;
587 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
588 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
591 for (i = 1; i < (fftSize / 2); i++)
593 in_c[2 * i] = testInput_i16_unscaled[2 * i];
594 in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
595 in_c[2 * (fftSize - i)] = in_c[2 * i];
596 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
598 in_c[0] = testInput_i16_unscaled[0];
600 in_c[fftSize] = testInput_i16_unscaled[1];
601 in_c[fftSize + 1] = 0;
602 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int16_t));
608 for (i = 0; i < test_loop; i++)
616 for (i = 0; i < test_loop; i++)
621 time_speedup = (ne10_float32_t) time_c / time_neon;
622 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
623 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
626 memcpy (in_c, testInput_i16_scaled , fftSize *
sizeof (ne10_int16_t));
627 memcpy (in_neon, testInput_i16_scaled , fftSize *
sizeof (ne10_int16_t));
633 for (i = 0; i < test_loop; i++)
641 for (i = 0; i < test_loop; i++)
646 time_speedup = (ne10_float32_t) time_c / time_neon;
647 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
648 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
651 for (i = 1; i < (fftSize / 2); i++)
653 in_c[2 * i] = testInput_i16_scaled[2 * i];
654 in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
655 in_c[2 * (fftSize - i)] = in_c[2 * i];
656 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
658 in_c[0] = testInput_i16_scaled[0];
660 in_c[fftSize] = testInput_i16_scaled[1];
661 in_c[fftSize + 1] = 0;
662 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int16_t));
668 for (i = 0; i < test_loop; i++)
676 for (i = 0; i < test_loop; i++)
681 time_speedup = (ne10_float32_t) time_c / time_neon;
682 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
683 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
688 NE10_FREE (guarded_in_c);
689 NE10_FREE (guarded_in_neon);
690 NE10_FREE (guarded_out_c);
691 NE10_FREE (guarded_out_neon);
694 void test_fft_c2c_1d_int16()
696 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 697 test_fft_c2c_1d_int16_conformance();
700 #if defined (PERFORMANCE_TEST) 701 test_fft_c2c_1d_int16_performance();
705 void test_fft_r2c_1d_int16()
707 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 708 test_fft_r2c_1d_int16_conformance();
711 #if defined (PERFORMANCE_TEST) 712 test_fft_r2c_1d_int16_performance();
716 static void my_test_setup (
void)
718 ne10_log_buffer_ptr = ne10_log_buffer;
721 void test_fixture_fft_c2c_1d_int16 (
void)
723 test_fixture_start();
725 fixture_setup (my_test_setup);
727 run_test (test_fft_c2c_1d_int16);
732 void test_fixture_fft_r2c_1d_int16 (
void)
734 test_fixture_start();
736 fixture_setup (my_test_setup);
738 run_test (test_fft_r2c_1d_int16);
structure for the 16 bits fixed point FFT function.
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 16-bit fixed point data.
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.