SDL 2.0
yuv_rgb.c
Go to the documentation of this file.
1// Copyright 2016 Adrien Descamps
2// Distributed under BSD 3-Clause License
3#include "../../SDL_internal.h"
4
5#include "yuv_rgb.h"
6
7#include "SDL_cpuinfo.h"
8/*#include <x86intrin.h>*/
9
10#define PRECISION 6
11#define PRECISION_FACTOR (1<<PRECISION)
12
13typedef struct
14{
18// |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R|
19// |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
20// |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B|
21
22typedef struct
23{
31// |R| |y_factor 0 v_r_factor| |Y-y_shift|
32// |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 |
33// |B| |y_factor u_b_factor 0 | | V-128 |
34
35#define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
36
37// for ITU-T T.871, values can be found in section 7
38// for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
39// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
40// all values are rounded to the fourth decimal
41
42static const YUV2RGBParam YUV2RGB[3] = {
43 // ITU-T T.871 (JPEG)
44 {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
45 // ITU-R BT.601-7
46 {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
47 // ITU-R BT.709-6
48 {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
49};
50
51static const RGB2YUVParam RGB2YUV[3] = {
52 // ITU-T T.871 (JPEG)
53 {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
54 // ITU-R BT.601-7
55 {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
56 // ITU-R BT.709-6
57 {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
58};
59
60/* The various layouts of YUV data we support */
61#define YUV_FORMAT_420 1
62#define YUV_FORMAT_422 2
63#define YUV_FORMAT_NV12 3
64
65/* The various formats of RGB pixel that we support */
66#define RGB_FORMAT_RGB565 1
67#define RGB_FORMAT_RGB24 2
68#define RGB_FORMAT_RGBA 3
69#define RGB_FORMAT_BGRA 4
70#define RGB_FORMAT_ARGB 5
71#define RGB_FORMAT_ABGR 6
72
73// divide by PRECISION_FACTOR and clamp to [0:255] interval
74// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
76{
77 static const uint8_t lut[512] =
78 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
81 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
82 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
83 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
84 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
85 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
86 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
87 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
88 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
91 };
92 return lut[(v+128*PRECISION_FACTOR)>>PRECISION];
93}
94
95
96#define STD_FUNCTION_NAME yuv420_rgb565_std
97#define YUV_FORMAT YUV_FORMAT_420
98#define RGB_FORMAT RGB_FORMAT_RGB565
99#include "yuv_rgb_std_func.h"
100
101#define STD_FUNCTION_NAME yuv420_rgb24_std
102#define YUV_FORMAT YUV_FORMAT_420
103#define RGB_FORMAT RGB_FORMAT_RGB24
104#include "yuv_rgb_std_func.h"
105
106#define STD_FUNCTION_NAME yuv420_rgba_std
107#define YUV_FORMAT YUV_FORMAT_420
108#define RGB_FORMAT RGB_FORMAT_RGBA
109#include "yuv_rgb_std_func.h"
110
111#define STD_FUNCTION_NAME yuv420_bgra_std
112#define YUV_FORMAT YUV_FORMAT_420
113#define RGB_FORMAT RGB_FORMAT_BGRA
114#include "yuv_rgb_std_func.h"
115
116#define STD_FUNCTION_NAME yuv420_argb_std
117#define YUV_FORMAT YUV_FORMAT_420
118#define RGB_FORMAT RGB_FORMAT_ARGB
119#include "yuv_rgb_std_func.h"
120
121#define STD_FUNCTION_NAME yuv420_abgr_std
122#define YUV_FORMAT YUV_FORMAT_420
123#define RGB_FORMAT RGB_FORMAT_ABGR
124#include "yuv_rgb_std_func.h"
125
126#define STD_FUNCTION_NAME yuv422_rgb565_std
127#define YUV_FORMAT YUV_FORMAT_422
128#define RGB_FORMAT RGB_FORMAT_RGB565
129#include "yuv_rgb_std_func.h"
130
131#define STD_FUNCTION_NAME yuv422_rgb24_std
132#define YUV_FORMAT YUV_FORMAT_422
133#define RGB_FORMAT RGB_FORMAT_RGB24
134#include "yuv_rgb_std_func.h"
135
136#define STD_FUNCTION_NAME yuv422_rgba_std
137#define YUV_FORMAT YUV_FORMAT_422
138#define RGB_FORMAT RGB_FORMAT_RGBA
139#include "yuv_rgb_std_func.h"
140
141#define STD_FUNCTION_NAME yuv422_bgra_std
142#define YUV_FORMAT YUV_FORMAT_422
143#define RGB_FORMAT RGB_FORMAT_BGRA
144#include "yuv_rgb_std_func.h"
145
146#define STD_FUNCTION_NAME yuv422_argb_std
147#define YUV_FORMAT YUV_FORMAT_422
148#define RGB_FORMAT RGB_FORMAT_ARGB
149#include "yuv_rgb_std_func.h"
150
151#define STD_FUNCTION_NAME yuv422_abgr_std
152#define YUV_FORMAT YUV_FORMAT_422
153#define RGB_FORMAT RGB_FORMAT_ABGR
154#include "yuv_rgb_std_func.h"
155
156#define STD_FUNCTION_NAME yuvnv12_rgb565_std
157#define YUV_FORMAT YUV_FORMAT_NV12
158#define RGB_FORMAT RGB_FORMAT_RGB565
159#include "yuv_rgb_std_func.h"
160
161#define STD_FUNCTION_NAME yuvnv12_rgb24_std
162#define YUV_FORMAT YUV_FORMAT_NV12
163#define RGB_FORMAT RGB_FORMAT_RGB24
164#include "yuv_rgb_std_func.h"
165
166#define STD_FUNCTION_NAME yuvnv12_rgba_std
167#define YUV_FORMAT YUV_FORMAT_NV12
168#define RGB_FORMAT RGB_FORMAT_RGBA
169#include "yuv_rgb_std_func.h"
170
171#define STD_FUNCTION_NAME yuvnv12_bgra_std
172#define YUV_FORMAT YUV_FORMAT_NV12
173#define RGB_FORMAT RGB_FORMAT_BGRA
174#include "yuv_rgb_std_func.h"
175
176#define STD_FUNCTION_NAME yuvnv12_argb_std
177#define YUV_FORMAT YUV_FORMAT_NV12
178#define RGB_FORMAT RGB_FORMAT_ARGB
179#include "yuv_rgb_std_func.h"
180
181#define STD_FUNCTION_NAME yuvnv12_abgr_std
182#define YUV_FORMAT YUV_FORMAT_NV12
183#define RGB_FORMAT RGB_FORMAT_ABGR
184#include "yuv_rgb_std_func.h"
185
188 const uint8_t *RGB, uint32_t RGB_stride,
189 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
190 YCbCrType yuv_type)
191{
192 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
193
194 uint32_t x, y;
195 for(y=0; y<(height-1); y+=2)
196 {
197 const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
198 *rgb_ptr2=RGB+(y+1)*RGB_stride;
199
200 uint8_t *y_ptr1=Y+y*Y_stride,
201 *y_ptr2=Y+(y+1)*Y_stride,
202 *u_ptr=U+(y/2)*UV_stride,
203 *v_ptr=V+(y/2)*UV_stride;
204
205 for(x=0; x<(width-1); x+=2)
206 {
207 // compute yuv for the four pixels, u and v values are summed
208 int32_t y_tmp, u_tmp, v_tmp;
209
210 y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
211 u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
212 v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
213 y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
214
215 y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
216 u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
217 v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
218 y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
219
220 y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
221 u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
222 v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
223 y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
224
225 y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
226 u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
227 v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
228 y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
229
230 u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
231 v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
232
233 rgb_ptr1 += 6;
234 rgb_ptr2 += 6;
235 y_ptr1 += 2;
236 y_ptr2 += 2;
237 u_ptr += 1;
238 v_ptr += 1;
239 }
240 }
241}
242
243#ifdef __SSE2__
244
245#define SSE_FUNCTION_NAME yuv420_rgb565_sse
246#define STD_FUNCTION_NAME yuv420_rgb565_std
247#define YUV_FORMAT YUV_FORMAT_420
248#define RGB_FORMAT RGB_FORMAT_RGB565
249#define SSE_ALIGNED
250#include "yuv_rgb_sse_func.h"
251
252#define SSE_FUNCTION_NAME yuv420_rgb565_sseu
253#define STD_FUNCTION_NAME yuv420_rgb565_std
254#define YUV_FORMAT YUV_FORMAT_420
255#define RGB_FORMAT RGB_FORMAT_RGB565
256#include "yuv_rgb_sse_func.h"
257
258#define SSE_FUNCTION_NAME yuv420_rgb24_sse
259#define STD_FUNCTION_NAME yuv420_rgb24_std
260#define YUV_FORMAT YUV_FORMAT_420
261#define RGB_FORMAT RGB_FORMAT_RGB24
262#define SSE_ALIGNED
263#include "yuv_rgb_sse_func.h"
264
265#define SSE_FUNCTION_NAME yuv420_rgb24_sseu
266#define STD_FUNCTION_NAME yuv420_rgb24_std
267#define YUV_FORMAT YUV_FORMAT_420
268#define RGB_FORMAT RGB_FORMAT_RGB24
269#include "yuv_rgb_sse_func.h"
270
271#define SSE_FUNCTION_NAME yuv420_rgba_sse
272#define STD_FUNCTION_NAME yuv420_rgba_std
273#define YUV_FORMAT YUV_FORMAT_420
274#define RGB_FORMAT RGB_FORMAT_RGBA
275#define SSE_ALIGNED
276#include "yuv_rgb_sse_func.h"
277
278#define SSE_FUNCTION_NAME yuv420_rgba_sseu
279#define STD_FUNCTION_NAME yuv420_rgba_std
280#define YUV_FORMAT YUV_FORMAT_420
281#define RGB_FORMAT RGB_FORMAT_RGBA
282#include "yuv_rgb_sse_func.h"
283
284#define SSE_FUNCTION_NAME yuv420_bgra_sse
285#define STD_FUNCTION_NAME yuv420_bgra_std
286#define YUV_FORMAT YUV_FORMAT_420
287#define RGB_FORMAT RGB_FORMAT_BGRA
288#define SSE_ALIGNED
289#include "yuv_rgb_sse_func.h"
290
291#define SSE_FUNCTION_NAME yuv420_bgra_sseu
292#define STD_FUNCTION_NAME yuv420_bgra_std
293#define YUV_FORMAT YUV_FORMAT_420
294#define RGB_FORMAT RGB_FORMAT_BGRA
295#include "yuv_rgb_sse_func.h"
296
297#define SSE_FUNCTION_NAME yuv420_argb_sse
298#define STD_FUNCTION_NAME yuv420_argb_std
299#define YUV_FORMAT YUV_FORMAT_420
300#define RGB_FORMAT RGB_FORMAT_ARGB
301#define SSE_ALIGNED
302#include "yuv_rgb_sse_func.h"
303
304#define SSE_FUNCTION_NAME yuv420_argb_sseu
305#define STD_FUNCTION_NAME yuv420_argb_std
306#define YUV_FORMAT YUV_FORMAT_420
307#define RGB_FORMAT RGB_FORMAT_ARGB
308#include "yuv_rgb_sse_func.h"
309
310#define SSE_FUNCTION_NAME yuv420_abgr_sse
311#define STD_FUNCTION_NAME yuv420_abgr_std
312#define YUV_FORMAT YUV_FORMAT_420
313#define RGB_FORMAT RGB_FORMAT_ABGR
314#define SSE_ALIGNED
315#include "yuv_rgb_sse_func.h"
316
317#define SSE_FUNCTION_NAME yuv420_abgr_sseu
318#define STD_FUNCTION_NAME yuv420_abgr_std
319#define YUV_FORMAT YUV_FORMAT_420
320#define RGB_FORMAT RGB_FORMAT_ABGR
321#include "yuv_rgb_sse_func.h"
322
323#define SSE_FUNCTION_NAME yuv422_rgb565_sse
324#define STD_FUNCTION_NAME yuv422_rgb565_std
325#define YUV_FORMAT YUV_FORMAT_422
326#define RGB_FORMAT RGB_FORMAT_RGB565
327#define SSE_ALIGNED
328#include "yuv_rgb_sse_func.h"
329
330#define SSE_FUNCTION_NAME yuv422_rgb565_sseu
331#define STD_FUNCTION_NAME yuv422_rgb565_std
332#define YUV_FORMAT YUV_FORMAT_422
333#define RGB_FORMAT RGB_FORMAT_RGB565
334#include "yuv_rgb_sse_func.h"
335
336#define SSE_FUNCTION_NAME yuv422_rgb24_sse
337#define STD_FUNCTION_NAME yuv422_rgb24_std
338#define YUV_FORMAT YUV_FORMAT_422
339#define RGB_FORMAT RGB_FORMAT_RGB24
340#define SSE_ALIGNED
341#include "yuv_rgb_sse_func.h"
342
343#define SSE_FUNCTION_NAME yuv422_rgb24_sseu
344#define STD_FUNCTION_NAME yuv422_rgb24_std
345#define YUV_FORMAT YUV_FORMAT_422
346#define RGB_FORMAT RGB_FORMAT_RGB24
347#include "yuv_rgb_sse_func.h"
348
349#define SSE_FUNCTION_NAME yuv422_rgba_sse
350#define STD_FUNCTION_NAME yuv422_rgba_std
351#define YUV_FORMAT YUV_FORMAT_422
352#define RGB_FORMAT RGB_FORMAT_RGBA
353#define SSE_ALIGNED
354#include "yuv_rgb_sse_func.h"
355
356#define SSE_FUNCTION_NAME yuv422_rgba_sseu
357#define STD_FUNCTION_NAME yuv422_rgba_std
358#define YUV_FORMAT YUV_FORMAT_422
359#define RGB_FORMAT RGB_FORMAT_RGBA
360#include "yuv_rgb_sse_func.h"
361
362#define SSE_FUNCTION_NAME yuv422_bgra_sse
363#define STD_FUNCTION_NAME yuv422_bgra_std
364#define YUV_FORMAT YUV_FORMAT_422
365#define RGB_FORMAT RGB_FORMAT_BGRA
366#define SSE_ALIGNED
367#include "yuv_rgb_sse_func.h"
368
369#define SSE_FUNCTION_NAME yuv422_bgra_sseu
370#define STD_FUNCTION_NAME yuv422_bgra_std
371#define YUV_FORMAT YUV_FORMAT_422
372#define RGB_FORMAT RGB_FORMAT_BGRA
373#include "yuv_rgb_sse_func.h"
374
375#define SSE_FUNCTION_NAME yuv422_argb_sse
376#define STD_FUNCTION_NAME yuv422_argb_std
377#define YUV_FORMAT YUV_FORMAT_422
378#define RGB_FORMAT RGB_FORMAT_ARGB
379#define SSE_ALIGNED
380#include "yuv_rgb_sse_func.h"
381
382#define SSE_FUNCTION_NAME yuv422_argb_sseu
383#define STD_FUNCTION_NAME yuv422_argb_std
384#define YUV_FORMAT YUV_FORMAT_422
385#define RGB_FORMAT RGB_FORMAT_ARGB
386#include "yuv_rgb_sse_func.h"
387
388#define SSE_FUNCTION_NAME yuv422_abgr_sse
389#define STD_FUNCTION_NAME yuv422_abgr_std
390#define YUV_FORMAT YUV_FORMAT_422
391#define RGB_FORMAT RGB_FORMAT_ABGR
392#define SSE_ALIGNED
393#include "yuv_rgb_sse_func.h"
394
395#define SSE_FUNCTION_NAME yuv422_abgr_sseu
396#define STD_FUNCTION_NAME yuv422_abgr_std
397#define YUV_FORMAT YUV_FORMAT_422
398#define RGB_FORMAT RGB_FORMAT_ABGR
399#include "yuv_rgb_sse_func.h"
400
401#define SSE_FUNCTION_NAME yuvnv12_rgb565_sse
402#define STD_FUNCTION_NAME yuvnv12_rgb565_std
403#define YUV_FORMAT YUV_FORMAT_NV12
404#define RGB_FORMAT RGB_FORMAT_RGB565
405#define SSE_ALIGNED
406#include "yuv_rgb_sse_func.h"
407
408#define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu
409#define STD_FUNCTION_NAME yuvnv12_rgb565_std
410#define YUV_FORMAT YUV_FORMAT_NV12
411#define RGB_FORMAT RGB_FORMAT_RGB565
412#include "yuv_rgb_sse_func.h"
413
414#define SSE_FUNCTION_NAME yuvnv12_rgb24_sse
415#define STD_FUNCTION_NAME yuvnv12_rgb24_std
416#define YUV_FORMAT YUV_FORMAT_NV12
417#define RGB_FORMAT RGB_FORMAT_RGB24
418#define SSE_ALIGNED
419#include "yuv_rgb_sse_func.h"
420
421#define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu
422#define STD_FUNCTION_NAME yuvnv12_rgb24_std
423#define YUV_FORMAT YUV_FORMAT_NV12
424#define RGB_FORMAT RGB_FORMAT_RGB24
425#include "yuv_rgb_sse_func.h"
426
427#define SSE_FUNCTION_NAME yuvnv12_rgba_sse
428#define STD_FUNCTION_NAME yuvnv12_rgba_std
429#define YUV_FORMAT YUV_FORMAT_NV12
430#define RGB_FORMAT RGB_FORMAT_RGBA
431#define SSE_ALIGNED
432#include "yuv_rgb_sse_func.h"
433
434#define SSE_FUNCTION_NAME yuvnv12_rgba_sseu
435#define STD_FUNCTION_NAME yuvnv12_rgba_std
436#define YUV_FORMAT YUV_FORMAT_NV12
437#define RGB_FORMAT RGB_FORMAT_RGBA
438#include "yuv_rgb_sse_func.h"
439
440#define SSE_FUNCTION_NAME yuvnv12_bgra_sse
441#define STD_FUNCTION_NAME yuvnv12_bgra_std
442#define YUV_FORMAT YUV_FORMAT_NV12
443#define RGB_FORMAT RGB_FORMAT_BGRA
444#define SSE_ALIGNED
445#include "yuv_rgb_sse_func.h"
446
447#define SSE_FUNCTION_NAME yuvnv12_bgra_sseu
448#define STD_FUNCTION_NAME yuvnv12_bgra_std
449#define YUV_FORMAT YUV_FORMAT_NV12
450#define RGB_FORMAT RGB_FORMAT_BGRA
451#include "yuv_rgb_sse_func.h"
452
453#define SSE_FUNCTION_NAME yuvnv12_argb_sse
454#define STD_FUNCTION_NAME yuvnv12_argb_std
455#define YUV_FORMAT YUV_FORMAT_NV12
456#define RGB_FORMAT RGB_FORMAT_ARGB
457#define SSE_ALIGNED
458#include "yuv_rgb_sse_func.h"
459
460#define SSE_FUNCTION_NAME yuvnv12_argb_sseu
461#define STD_FUNCTION_NAME yuvnv12_argb_std
462#define YUV_FORMAT YUV_FORMAT_NV12
463#define RGB_FORMAT RGB_FORMAT_ARGB
464#include "yuv_rgb_sse_func.h"
465
466#define SSE_FUNCTION_NAME yuvnv12_abgr_sse
467#define STD_FUNCTION_NAME yuvnv12_abgr_std
468#define YUV_FORMAT YUV_FORMAT_NV12
469#define RGB_FORMAT RGB_FORMAT_ABGR
470#define SSE_ALIGNED
471#include "yuv_rgb_sse_func.h"
472
473#define SSE_FUNCTION_NAME yuvnv12_abgr_sseu
474#define STD_FUNCTION_NAME yuvnv12_abgr_std
475#define YUV_FORMAT YUV_FORMAT_NV12
476#define RGB_FORMAT RGB_FORMAT_ABGR
477#include "yuv_rgb_sse_func.h"
478
479
480#define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
481R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
482R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
483G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
484G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
485B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
486B2 = _mm_unpackhi_epi8(RGB3, RGB6);
487
488#define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
489RGB1 = _mm_unpacklo_epi8(R1, G2); \
490RGB2 = _mm_unpackhi_epi8(R1, G2); \
491RGB3 = _mm_unpacklo_epi8(R2, B1); \
492RGB4 = _mm_unpackhi_epi8(R2, B1); \
493RGB5 = _mm_unpacklo_epi8(G1, B2); \
494RGB6 = _mm_unpackhi_epi8(G1, B2); \
495
496#define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
497UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
498UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
499UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
500UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
501UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
502
503#define RGB2YUV_16(R, G, B, Y, U, V) \
504Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
505 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
506Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
507Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
508Y = _mm_srai_epi16(Y, PRECISION); \
509U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
510 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
511U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
512U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
513U = _mm_srai_epi16(U, PRECISION); \
514V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
515 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
516V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
517V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
518V = _mm_srai_epi16(V, PRECISION);
519
520#define RGB2YUV_32 \
521 __m128i r1, r2, b1, b2, g1, g2; \
522 __m128i r_16, g_16, b_16; \
523 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
524 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
525 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
526 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
527 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
528 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
529 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
530 /* unpack rgb24 data to r, g and b data in separate channels*/ \
531 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
532 /* process pixels of first line */ \
533 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
534 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
535 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
536 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
537 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
538 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
539 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
540 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
541 y = _mm_packus_epi16(y1_16, y2_16); \
542 u1 = _mm_packus_epi16(u1_16, u2_16); \
543 v1 = _mm_packus_epi16(v1_16, v2_16); \
544 /* save Y values */ \
545 SAVE_SI128((__m128i*)(y_ptr1), y); \
546 /* process pixels of second line */ \
547 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
548 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
549 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
550 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
551 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
552 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
553 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
554 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
555 y = _mm_packus_epi16(y1_16, y2_16); \
556 u2 = _mm_packus_epi16(u1_16, u2_16); \
557 v2 = _mm_packus_epi16(v1_16, v2_16); \
558 /* save Y values */ \
559 SAVE_SI128((__m128i*)(y_ptr2), y); \
560 /* vertical subsampling of u/v values */ \
561 u1_tmp = _mm_avg_epu8(u1, u2); \
562 v1_tmp = _mm_avg_epu8(v1, v2); \
563 /* do the same again with next data */ \
564 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
565 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
566 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
567 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
568 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
569 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
570 /* unpack rgb24 data to r, g and b data in separate channels*/ \
571 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
572 /* process pixels of first line */ \
573 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
574 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
575 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
576 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
577 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
578 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
579 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
580 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
581 y = _mm_packus_epi16(y1_16, y2_16); \
582 u1 = _mm_packus_epi16(u1_16, u2_16); \
583 v1 = _mm_packus_epi16(v1_16, v2_16); \
584 /* save Y values */ \
585 SAVE_SI128((__m128i*)(y_ptr1+16), y); \
586 /* process pixels of second line */ \
587 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
588 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
589 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
590 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
591 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
592 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
593 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
594 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
595 y = _mm_packus_epi16(y1_16, y2_16); \
596 u2 = _mm_packus_epi16(u1_16, u2_16); \
597 v2 = _mm_packus_epi16(v1_16, v2_16); \
598 /* save Y values */ \
599 SAVE_SI128((__m128i*)(y_ptr2+16), y); \
600 /* vertical subsampling of u/v values */ \
601 u2_tmp = _mm_avg_epu8(u1, u2); \
602 v2_tmp = _mm_avg_epu8(v1, v2); \
603 /* horizontal subsampling of u/v values */ \
604 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
605 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
606 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
607 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
608 u1 = _mm_avg_epu8(u1, u2); \
609 v1 = _mm_avg_epu8(v1, v2); \
610 SAVE_SI128((__m128i*)(u_ptr), u1); \
611 SAVE_SI128((__m128i*)(v_ptr), v1);
612
614 const uint8_t *RGB, uint32_t RGB_stride,
615 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
616 YCbCrType yuv_type)
617{
618 #define LOAD_SI128 _mm_load_si128
619 #define SAVE_SI128 _mm_stream_si128
620 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
621
622 uint32_t xpos, ypos;
623 for(ypos=0; ypos<(height-1); ypos+=2)
624 {
625 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
626 *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
627
628 uint8_t *y_ptr1=Y+ypos*Y_stride,
629 *y_ptr2=Y+(ypos+1)*Y_stride,
630 *u_ptr=U+(ypos/2)*UV_stride,
631 *v_ptr=V+(ypos/2)*UV_stride;
632
633 for(xpos=0; xpos<(width-31); xpos+=32)
634 {
635 RGB2YUV_32
636
637 rgb_ptr1+=96;
638 rgb_ptr2+=96;
639 y_ptr1+=32;
640 y_ptr2+=32;
641 u_ptr+=16;
642 v_ptr+=16;
643 }
644 }
645 #undef LOAD_SI128
646 #undef SAVE_SI128
647}
648
650 const uint8_t *RGB, uint32_t RGB_stride,
651 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
652 YCbCrType yuv_type)
653{
654 #define LOAD_SI128 _mm_loadu_si128
655 #define SAVE_SI128 _mm_storeu_si128
656 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
657
658 uint32_t xpos, ypos;
659 for(ypos=0; ypos<(height-1); ypos+=2)
660 {
661 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
662 *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
663
664 uint8_t *y_ptr1=Y+ypos*Y_stride,
665 *y_ptr2=Y+(ypos+1)*Y_stride,
666 *u_ptr=U+(ypos/2)*UV_stride,
667 *v_ptr=V+(ypos/2)*UV_stride;
668
669 for(xpos=0; xpos<(width-31); xpos+=32)
670 {
671 RGB2YUV_32
672
673 rgb_ptr1+=96;
674 rgb_ptr2+=96;
675 y_ptr1+=32;
676 y_ptr2+=32;
677 u_ptr+=16;
678 v_ptr+=16;
679 }
680 }
681 #undef LOAD_SI128
682 #undef SAVE_SI128
683}
684
685
686#endif //__SSE2__
687
signed short int16_t
unsigned int uint32_t
signed int int32_t
unsigned char uint8_t
GLint GLint GLint GLint GLint GLint y
Definition: SDL_opengl.h:1574
const GLdouble * v
Definition: SDL_opengl.h:2064
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
GLint GLint GLint GLint GLint x
Definition: SDL_opengl.h:1574
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
GLfloat param
GLuint GLenum matrix
@ RGB
Definition: edid.h:20
uint8_t y_shift
Definition: yuv_rgb.c:15
int16_t y_factor
Definition: yuv_rgb.c:25
int16_t v_r_factor
Definition: yuv_rgb.c:26
int16_t v_g_factor
Definition: yuv_rgb.c:28
int16_t u_g_factor
Definition: yuv_rgb.c:27
uint8_t y_shift
Definition: yuv_rgb.c:24
int16_t u_b_factor
Definition: yuv_rgb.c:29
static const YUV2RGBParam YUV2RGB[3]
Definition: yuv_rgb.c:42
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
Definition: yuv_rgb.c:186
static uint8_t clampU8(int32_t v)
Definition: yuv_rgb.c:75
static const RGB2YUVParam RGB2YUV[3]
Definition: yuv_rgb.c:51
#define PRECISION
Definition: yuv_rgb.c:10
#define PRECISION_FACTOR
Definition: yuv_rgb.c:11
#define V(value)
Definition: yuv_rgb.c:35
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
YCbCrType
Definition: yuv_rgb.h:23