22#include "../SDL_internal.h"
29#define HAVE_NEON_INTRINSICS 1
33#define HAVE_SSE2_INTRINSICS 1
36#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
37#define NEED_SCALAR_CONVERTER_FALLBACKS 0
38#elif __MACOSX__ && HAVE_SSE2_INTRINSICS
39#define NEED_SCALAR_CONVERTER_FALLBACKS 0
40#elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
41#define NEED_SCALAR_CONVERTER_FALLBACKS 0
42#elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
43#define NEED_SCALAR_CONVERTER_FALLBACKS 0
47#ifndef NEED_SCALAR_CONVERTER_FALLBACKS
48#define NEED_SCALAR_CONVERTER_FALLBACKS 1
64#define DIVBY128 0.0078125f
65#define DIVBY32768 0.000030517578125f
66#define DIVBY8388607 0.00000011920930376163766f
69#if NEED_SCALAR_CONVERTER_FALLBACKS
150 float *
dst = (
float *) cvt->
buf;
167 const float *
src = (
const float *) cvt->
buf;
174 const float sample = *
src;
175 if (sample >= 1.0f) {
177 }
else if (sample <= -1.0f) {
193 const float *
src = (
const float *) cvt->
buf;
200 const float sample = *
src;
201 if (sample >= 1.0f) {
203 }
else if (sample <= -1.0f) {
206 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
219 const float *
src = (
const float *) cvt->
buf;
226 const float sample = *
src;
227 if (sample >= 1.0f) {
229 }
else if (sample <= -1.0f) {
245 const float *
src = (
const float *) cvt->
buf;
252 const float sample = *
src;
253 if (sample >= 1.0f) {
255 }
else if (sample <= -1.0f) {
258 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
271 const float *
src = (
const float *) cvt->
buf;
278 const float sample = *
src;
279 if (sample >= 1.0f) {
281 }
else if (sample <= -1.0f) {
284 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
295#if HAVE_SSE2_INTRINSICS
314 if ((((
size_t)
src) & 15) == 0) {
316 const __m128i *mmsrc = (
const __m128i *)
src;
317 const __m128i
zero = _mm_setzero_si128();
318 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
320 const __m128i bytes = _mm_load_si128(mmsrc);
322 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
324 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
326 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1,
zero), 16), 16)), divby128);
327 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2,
zero), 16), 16)), divby128);
328 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1,
zero), 16), 16)), divby128);
329 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2,
zero), 16), 16)), divby128);
331 _mm_store_ps(
dst, _mm_unpacklo_ps(floats1, floats2));
332 _mm_store_ps(
dst+4, _mm_unpackhi_ps(floats1, floats2));
333 _mm_store_ps(
dst+8, _mm_unpacklo_ps(floats3, floats4));
334 _mm_store_ps(
dst+12, _mm_unpackhi_ps(floats3, floats4));
335 i -= 16; mmsrc--;
dst -= 16;
373 if ((((
size_t)
src) & 15) == 0) {
375 const __m128i *mmsrc = (
const __m128i *)
src;
376 const __m128i
zero = _mm_setzero_si128();
377 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
378 const __m128 minus1 = _mm_set1_ps(-1.0f);
380 const __m128i bytes = _mm_load_si128(mmsrc);
382 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
384 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
387 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1,
zero)), divby128), minus1);
388 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2,
zero)), divby128), minus1);
389 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1,
zero)), divby128), minus1);
390 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2,
zero)), divby128), minus1);
392 _mm_store_ps(
dst, _mm_unpacklo_ps(floats1, floats2));
393 _mm_store_ps(
dst+4, _mm_unpackhi_ps(floats1, floats2));
394 _mm_store_ps(
dst+8, _mm_unpacklo_ps(floats3, floats4));
395 _mm_store_ps(
dst+12, _mm_unpackhi_ps(floats3, floats4));
396 i -= 16; mmsrc--;
dst -= 16;
434 if ((((
size_t)
src) & 15) == 0) {
436 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
438 const __m128i ints = _mm_load_si128((__m128i
const *)
src);
440 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
442 const __m128i
b = _mm_srai_epi32(ints, 16);
444 _mm_store_ps(
dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(
a,
b)), divby32768));
445 _mm_store_ps(
dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(
a,
b)), divby32768));
482 if ((((
size_t)
src) & 15) == 0) {
484 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
485 const __m128 minus1 = _mm_set1_ps(1.0f);
487 const __m128i ints = _mm_load_si128((__m128i
const *)
src);
489 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
491 const __m128i
b = _mm_srli_epi32(ints, 16);
493 _mm_store_ps(
dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(
a,
b)), divby32768), minus1));
494 _mm_store_ps(
dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(
a,
b)), divby32768), minus1));
517 float *
dst = (
float *) cvt->
buf;
530 if ((((
size_t)
src) & 15) == 0) {
533 const __m128i *mmsrc = (
const __m128i *)
src;
536 _mm_store_ps(
dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
537 i -= 4; mmsrc++;
dst += 4;
556 const float *
src = (
const float *) cvt->
buf;
564 const float sample = *
src;
565 if (sample >= 1.0f) {
567 }
else if (sample <= -1.0f) {
577 if ((((
size_t)
src) & 15) == 0) {
579 const __m128
one = _mm_set1_ps(1.0f);
580 const __m128 negone = _mm_set1_ps(-1.0f);
581 const __m128 mulby127 = _mm_set1_ps(127.0f);
582 __m128i *mmdst = (__m128i *)
dst;
584 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby127));
585 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby127));
586 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+8)),
one), mulby127));
587 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+12)),
one), mulby127));
588 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
589 i -= 16;
src += 16; mmdst++;
596 const float sample = *
src;
597 if (sample >= 1.0f) {
599 }
else if (sample <= -1.0f) {
616 const float *
src = (
const float *) cvt->
buf;
624 const float sample = *
src;
625 if (sample >= 1.0f) {
627 }
else if (sample <= -1.0f) {
630 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
637 if ((((
size_t)
src) & 15) == 0) {
639 const __m128
one = _mm_set1_ps(1.0f);
640 const __m128 negone = _mm_set1_ps(-1.0f);
641 const __m128 mulby127 = _mm_set1_ps(127.0f);
642 __m128i *mmdst = (__m128i *)
dst;
644 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one),
one), mulby127));
645 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one),
one), mulby127));
646 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+8)),
one),
one), mulby127));
647 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+12)),
one),
one), mulby127));
648 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
649 i -= 16;
src += 16; mmdst++;
656 const float sample = *
src;
657 if (sample >= 1.0f) {
659 }
else if (sample <= -1.0f) {
662 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
676 const float *
src = (
const float *) cvt->
buf;
684 const float sample = *
src;
685 if (sample >= 1.0f) {
687 }
else if (sample <= -1.0f) {
697 if ((((
size_t)
src) & 15) == 0) {
699 const __m128
one = _mm_set1_ps(1.0f);
700 const __m128 negone = _mm_set1_ps(-1.0f);
701 const __m128 mulby32767 = _mm_set1_ps(32767.0f);
702 __m128i *mmdst = (__m128i *)
dst;
704 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby32767));
705 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby32767));
706 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
707 i -= 8;
src += 8; mmdst++;
714 const float sample = *
src;
715 if (sample >= 1.0f) {
717 }
else if (sample <= -1.0f) {
734 const float *
src = (
const float *) cvt->
buf;
742 const float sample = *
src;
743 if (sample >= 1.0f) {
745 }
else if (sample <= -1.0f) {
748 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
755 if ((((
size_t)
src) & 15) == 0) {
764 const __m128 mulby32767 = _mm_set1_ps(32767.0f);
765 const __m128i topbit = _mm_set1_epi16(-32768);
766 const __m128
one = _mm_set1_ps(1.0f);
767 const __m128 negone = _mm_set1_ps(-1.0f);
768 __m128i *mmdst = (__m128i *)
dst;
770 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby32767));
771 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby32767));
772 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
773 i -= 8;
src += 8; mmdst++;
780 const float sample = *
src;
781 if (sample >= 1.0f) {
783 }
else if (sample <= -1.0f) {
786 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
800 const float *
src = (
const float *) cvt->
buf;
808 const float sample = *
src;
809 if (sample >= 1.0f) {
811 }
else if (sample <= -1.0f) {
814 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
823 const __m128
one = _mm_set1_ps(1.0f);
824 const __m128 negone = _mm_set1_ps(-1.0f);
825 const __m128 mulby8388607 = _mm_set1_ps(8388607.0f);
826 __m128i *mmdst = (__m128i *)
dst;
828 _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby8388607)), 8));
829 i -= 4;
src += 4; mmdst++;
836 const float sample = *
src;
837 if (sample >= 1.0f) {
839 }
else if (sample <= -1.0f) {
842 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
854#if HAVE_NEON_INTRINSICS
873 if ((((
size_t)
src) & 15) == 0) {
876 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
878 const int8x16_t bytes = vld1q_s8(mmsrc);
879 const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes));
880 const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes));
882 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
883 vst1q_f32(
dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
884 vst1q_f32(
dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
885 vst1q_f32(
dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
886 i -= 16; mmsrc -= 16;
dst -= 16;
924 if ((((
size_t)
src) & 15) == 0) {
927 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
928 const float32x4_t negone = vdupq_n_f32(-1.0f);
930 const uint8x16_t bytes = vld1q_u8(mmsrc);
931 const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes));
932 const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes));
934 vst1q_f32(
dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128));
935 vst1q_f32(
dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128));
936 vst1q_f32(
dst+8, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128));
937 vst1q_f32(
dst+12, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128));
938 i -= 16; mmsrc -= 16;
dst -= 16;
976 if ((((
size_t)
src) & 15) == 0) {
978 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
980 const int16x8_t ints = vld1q_s16((
int16_t const *)
src);
982 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
983 vst1q_f32(
dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
1020 if ((((
size_t)
src) & 15) == 0) {
1022 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
1023 const float32x4_t negone = vdupq_n_f32(-1.0f);
1025 const uint16x8_t uints = vld1q_u16((
uint16_t const *)
src);
1027 vst1q_f32(
dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1028 vst1q_f32(
dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1051 float *
dst = (
float *) cvt->
buf;
1064 if ((((
size_t)
src) & 15) == 0) {
1066 const float32x4_t divby8388607 = vdupq_n_f32(
DIVBY8388607);
1070 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1071 i -= 4; mmsrc += 4;
dst += 4;
1090 const float *
src = (
const float *) cvt->
buf;
1098 const float sample = *
src;
1099 if (sample >= 1.0f) {
1101 }
else if (sample <= -1.0f) {
1111 if ((((
size_t)
src) & 15) == 0) {
1113 const float32x4_t
one = vdupq_n_f32(1.0f);
1114 const float32x4_t negone = vdupq_n_f32(-1.0f);
1115 const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1118 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby127));
1119 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one), mulby127));
1120 const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+8)),
one), mulby127));
1121 const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+12)),
one), mulby127));
1122 const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1123 const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4)));
1124 vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi));
1125 i -= 16;
src += 16; mmdst += 16;
1132 const float sample = *
src;
1133 if (sample >= 1.0f) {
1135 }
else if (sample <= -1.0f) {
1152 const float *
src = (
const float *) cvt->
buf;
1160 const float sample = *
src;
1161 if (sample >= 1.0f) {
1163 }
else if (sample <= -1.0f) {
1166 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
1173 if ((((
size_t)
src) & 15) == 0) {
1175 const float32x4_t
one = vdupq_n_f32(1.0f);
1176 const float32x4_t negone = vdupq_n_f32(-1.0f);
1177 const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1180 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one),
one), mulby127));
1181 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one),
one), mulby127));
1182 const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+8)),
one),
one), mulby127));
1183 const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+12)),
one),
one), mulby127));
1184 const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1185 const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4)));
1186 vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi));
1187 i -= 16;
src += 16; mmdst += 16;
1195 const float sample = *
src;
1196 if (sample >= 1.0f) {
1198 }
else if (sample <= -1.0f) {
1201 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
1215 const float *
src = (
const float *) cvt->
buf;
1223 const float sample = *
src;
1224 if (sample >= 1.0f) {
1226 }
else if (sample <= -1.0f) {
1236 if ((((
size_t)
src) & 15) == 0) {
1238 const float32x4_t
one = vdupq_n_f32(1.0f);
1239 const float32x4_t negone = vdupq_n_f32(-1.0f);
1240 const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1243 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby32767));
1244 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one), mulby32767));
1245 vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1246 i -= 8;
src += 8; mmdst += 8;
1253 const float sample = *
src;
1254 if (sample >= 1.0f) {
1256 }
else if (sample <= -1.0f) {
1273 const float *
src = (
const float *) cvt->
buf;
1281 const float sample = *
src;
1282 if (sample >= 1.0f) {
1284 }
else if (sample <= -1.0f) {
1287 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
1294 if ((((
size_t)
src) & 15) == 0) {
1296 const float32x4_t
one = vdupq_n_f32(1.0f);
1297 const float32x4_t negone = vdupq_n_f32(-1.0f);
1298 const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1301 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one),
one), mulby32767));
1302 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one),
one), mulby32767));
1303 vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1304 i -= 8;
src += 8; mmdst += 8;
1311 const float sample = *
src;
1312 if (sample >= 1.0f) {
1314 }
else if (sample <= -1.0f) {
1317 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
1331 const float *
src = (
const float *) cvt->
buf;
1339 const float sample = *
src;
1340 if (sample >= 1.0f) {
1342 }
else if (sample <= -1.0f) {
1343 *
dst = (-2147483647) - 1;
1345 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
1354 const float32x4_t
one = vdupq_n_f32(1.0f);
1355 const float32x4_t negone = vdupq_n_f32(-1.0f);
1356 const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0f);
1359 vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby8388607)), 8));
1360 i -= 4;
src += 4; mmdst += 4;
1367 const float sample = *
src;
1368 if (sample >= 1.0f) {
1370 }
else if (sample <= -1.0f) {
1371 *
dst = (-2147483647) - 1;
1373 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
1390 if (converters_chosen) {
1394#define SET_CONVERTER_FUNCS(fntype) \
1395 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
1396 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
1397 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
1398 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
1399 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
1400 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
1401 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
1402 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
1403 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
1404 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
1405 converters_chosen = SDL_TRUE
1407#if HAVE_SSE2_INTRINSICS
1414#if HAVE_NEON_INTRINSICS
1421#if NEED_SCALAR_CONVERTER_FALLBACKS
1425#undef SET_CONVERTER_FUNCS
#define SDL_assert(condition)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
Uint16 SDL_AudioFormat
Audio format flags.
#define LOG_DEBUG_CONVERT(from, to)
SDL_AudioFilter SDL_Convert_F32_to_U16
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
SDL_AudioFilter SDL_Convert_F32_to_S32
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_S16
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U8
SDL_AudioFilter SDL_Convert_F32_to_S8
SDL_AudioFilter SDL_Convert_S8_to_F32
#define SET_CONVERTER_FUNCS(fntype)
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_S32_to_F32
SDL_AudioFilter SDL_Convert_S16_to_F32
SDL_AudioFilter SDL_Convert_U8_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
GLboolean GLboolean GLboolean b
GLboolean GLboolean GLboolean GLboolean a
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
A structure to hold a set of audio conversion filters and buffers.
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]