SDL 2.0
SDL_blit_A.c
Go to the documentation of this file.
1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "../SDL_internal.h"
22
23#include "SDL_video.h"
24#include "SDL_blit.h"
25
26/* Functions to perform alpha blended blitting */
27
28/* N->1 blending with per-surface alpha */
29static void
31{
32 int width = info->dst_w;
33 int height = info->dst_h;
34 Uint8 *src = info->src;
35 int srcskip = info->src_skip;
36 Uint8 *dst = info->dst;
37 int dstskip = info->dst_skip;
38 Uint8 *palmap = info->table;
39 SDL_PixelFormat *srcfmt = info->src_fmt;
40 SDL_PixelFormat *dstfmt = info->dst_fmt;
41 int srcbpp = srcfmt->BytesPerPixel;
42 Uint32 Pixel;
43 unsigned sR, sG, sB;
44 unsigned dR, dG, dB;
45 const unsigned A = info->a;
46
47 while (height--) {
48 /* *INDENT-OFF* */
50 {
51 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
52 dR = dstfmt->palette->colors[*dst].r;
53 dG = dstfmt->palette->colors[*dst].g;
54 dB = dstfmt->palette->colors[*dst].b;
55 ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
56 dR &= 0xff;
57 dG &= 0xff;
58 dB &= 0xff;
59 /* Pack RGB into 8bit pixel */
60 if ( palmap == NULL ) {
61 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
62 } else {
63 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
64 }
65 dst++;
66 src += srcbpp;
67 },
68 width);
69 /* *INDENT-ON* */
70 src += srcskip;
71 dst += dstskip;
72 }
73}
74
75/* N->1 blending with pixel alpha */
76static void
78{
79 int width = info->dst_w;
80 int height = info->dst_h;
81 Uint8 *src = info->src;
82 int srcskip = info->src_skip;
83 Uint8 *dst = info->dst;
84 int dstskip = info->dst_skip;
85 Uint8 *palmap = info->table;
86 SDL_PixelFormat *srcfmt = info->src_fmt;
87 SDL_PixelFormat *dstfmt = info->dst_fmt;
88 int srcbpp = srcfmt->BytesPerPixel;
89 Uint32 Pixel;
90 unsigned sR, sG, sB, sA;
91 unsigned dR, dG, dB;
92
93 while (height--) {
94 /* *INDENT-OFF* */
96 {
97 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA);
98 dR = dstfmt->palette->colors[*dst].r;
99 dG = dstfmt->palette->colors[*dst].g;
100 dB = dstfmt->palette->colors[*dst].b;
101 ALPHA_BLEND_RGB(sR, sG, sB, sA, dR, dG, dB);
102 dR &= 0xff;
103 dG &= 0xff;
104 dB &= 0xff;
105 /* Pack RGB into 8bit pixel */
106 if ( palmap == NULL ) {
107 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
108 } else {
109 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
110 }
111 dst++;
112 src += srcbpp;
113 },
114 width);
115 /* *INDENT-ON* */
116 src += srcskip;
117 dst += dstskip;
118 }
119}
120
121/* colorkeyed N->1 blending with per-surface alpha */
122static void
124{
125 int width = info->dst_w;
126 int height = info->dst_h;
127 Uint8 *src = info->src;
128 int srcskip = info->src_skip;
129 Uint8 *dst = info->dst;
130 int dstskip = info->dst_skip;
131 Uint8 *palmap = info->table;
132 SDL_PixelFormat *srcfmt = info->src_fmt;
133 SDL_PixelFormat *dstfmt = info->dst_fmt;
134 int srcbpp = srcfmt->BytesPerPixel;
135 Uint32 ckey = info->colorkey;
136 Uint32 Pixel;
137 unsigned sR, sG, sB;
138 unsigned dR, dG, dB;
139 const unsigned A = info->a;
140
141 while (height--) {
142 /* *INDENT-OFF* */
144 {
145 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
146 if ( Pixel != ckey ) {
147 dR = dstfmt->palette->colors[*dst].r;
148 dG = dstfmt->palette->colors[*dst].g;
149 dB = dstfmt->palette->colors[*dst].b;
150 ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
151 dR &= 0xff;
152 dG &= 0xff;
153 dB &= 0xff;
154 /* Pack RGB into 8bit pixel */
155 if ( palmap == NULL ) {
156 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
157 } else {
158 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
159 }
160 }
161 dst++;
162 src += srcbpp;
163 },
164 width);
165 /* *INDENT-ON* */
166 src += srcskip;
167 dst += dstskip;
168 }
169}
170
171#ifdef __MMX__
172
173/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
174static void
175BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
176{
177 int width = info->dst_w;
178 int height = info->dst_h;
179 Uint32 *srcp = (Uint32 *) info->src;
180 int srcskip = info->src_skip >> 2;
181 Uint32 *dstp = (Uint32 *) info->dst;
182 int dstskip = info->dst_skip >> 2;
183 Uint32 dalpha = info->dst_fmt->Amask;
184
185 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
186
187 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
188 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
189 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
190
191 while (height--) {
192 int n = width;
193 if (n & 1) {
194 Uint32 s = *srcp++;
195 Uint32 d = *dstp;
196 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
197 + (s & d & 0x00010101)) | dalpha;
198 n--;
199 }
200
201 for (n >>= 1; n > 0; --n) {
202 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
203 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
204
205 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
206 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
207
208 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */
209 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */
210 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */
211 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */
212
213 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */
214 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */
215 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */
216 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
217
218 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
219 dstp += 2;
220 srcp += 2;
221 }
222
223 srcp += srcskip;
224 dstp += dstskip;
225 }
226 _mm_empty();
227}
228
229/* fast RGB888->(A)RGB888 blending with surface alpha */
230static void
231BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
232{
233 SDL_PixelFormat *df = info->dst_fmt;
234 Uint32 chanmask;
235 unsigned alpha = info->a;
236
237 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
238 /* only call a128 version when R,G,B occupy lower bits */
239 BlitRGBtoRGBSurfaceAlpha128MMX(info);
240 } else {
241 int width = info->dst_w;
242 int height = info->dst_h;
243 Uint32 *srcp = (Uint32 *) info->src;
244 int srcskip = info->src_skip >> 2;
245 Uint32 *dstp = (Uint32 *) info->dst;
246 int dstskip = info->dst_skip >> 2;
247 Uint32 dalpha = df->Amask;
248 Uint32 amult;
249
250 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
251
252 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
253 /* form the alpha mult */
254 amult = alpha | (alpha << 8);
255 amult = amult | (amult << 16);
256 chanmask =
257 (0xff << df->Rshift) | (0xff << df->
258 Gshift) | (0xff << df->Bshift);
259 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
260 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
261 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
262 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
263
264 while (height--) {
265 int n = width;
266 if (n & 1) {
267 /* One Pixel Blend */
268 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */
269 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */
270
271 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
272 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
273
274 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */
275 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
276 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
277 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */
278
279 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
280 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
281 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
282
283 ++srcp;
284 ++dstp;
285
286 n--;
287 }
288
289 for (n >>= 1; n > 0; --n) {
290 /* Two Pixels Blend */
291 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
292 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
293 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */
294 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */
295
296 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
297 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
298 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
299 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
300
301 src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */
302 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */
303 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */
304 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */
305
306 src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */
307 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
308 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
309 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */
310
311 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
312 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
313
314 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
315
316 srcp += 2;
317 dstp += 2;
318 }
319 srcp += srcskip;
320 dstp += dstskip;
321 }
322 _mm_empty();
323 }
324}
325
326/* fast ARGB888->(A)RGB888 blending with pixel alpha */
327static void
328BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
329{
330 int width = info->dst_w;
331 int height = info->dst_h;
332 Uint32 *srcp = (Uint32 *) info->src;
333 int srcskip = info->src_skip >> 2;
334 Uint32 *dstp = (Uint32 *) info->dst;
335 int dstskip = info->dst_skip >> 2;
336 SDL_PixelFormat *sf = info->src_fmt;
337 Uint32 amask = sf->Amask;
338 Uint32 ashift = sf->Ashift;
339 Uint64 multmask, multmask2;
340
341 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
342
343 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
344 multmask = 0x00FF;
345 multmask <<= (ashift * 2);
346 multmask2 = 0x00FF00FF00FF00FFULL;
347
348 while (height--) {
349 /* *INDENT-OFF* */
351 Uint32 alpha = *srcp & amask;
352 if (alpha == 0) {
353 /* do nothing */
354 } else if (alpha == amask) {
355 *dstp = *srcp;
356 } else {
357 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
358 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
359
360 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
361 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
362
363 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
364 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
365 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
366 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
367 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */
368 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */
369
370 /* blend */
371 src1 = _mm_mullo_pi16(src1, mm_alpha);
372 src1 = _mm_srli_pi16(src1, 8);
373 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
374 dst1 = _mm_srli_pi16(dst1, 8);
375 dst1 = _mm_add_pi16(src1, dst1);
376 dst1 = _mm_packs_pu16(dst1, mm_zero);
377
378 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
379 }
380 ++srcp;
381 ++dstp;
382 }, width);
383 /* *INDENT-ON* */
384 srcp += srcskip;
385 dstp += dstskip;
386 }
387 _mm_empty();
388}
389
390#endif /* __MMX__ */
391
392/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
393static void
395{
396 int width = info->dst_w;
397 int height = info->dst_h;
398 Uint32 *srcp = (Uint32 *) info->src;
399 int srcskip = info->src_skip >> 2;
400 Uint32 *dstp = (Uint32 *) info->dst;
401 int dstskip = info->dst_skip >> 2;
402
403 while (height--) {
404 /* *INDENT-OFF* */
406 Uint32 s = *srcp++;
407 Uint32 d = *dstp;
408 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
409 + (s & d & 0x00010101)) | 0xff000000;
410 }, width);
411 /* *INDENT-ON* */
412 srcp += srcskip;
413 dstp += dstskip;
414 }
415}
416
417/* fast RGB888->(A)RGB888 blending with surface alpha */
418static void
420{
421 unsigned alpha = info->a;
422 if (alpha == 128) {
424 } else {
425 int width = info->dst_w;
426 int height = info->dst_h;
427 Uint32 *srcp = (Uint32 *) info->src;
428 int srcskip = info->src_skip >> 2;
429 Uint32 *dstp = (Uint32 *) info->dst;
430 int dstskip = info->dst_skip >> 2;
431 Uint32 s;
432 Uint32 d;
433 Uint32 s1;
434 Uint32 d1;
435
436 while (height--) {
437 /* *INDENT-OFF* */
439 s = *srcp;
440 d = *dstp;
441 s1 = s & 0xff00ff;
442 d1 = d & 0xff00ff;
443 d1 = (d1 + ((s1 - d1) * alpha >> 8))
444 & 0xff00ff;
445 s &= 0xff00;
446 d &= 0xff00;
447 d = (d + ((s - d) * alpha >> 8)) & 0xff00;
448 *dstp = d1 | d | 0xff000000;
449 ++srcp;
450 ++dstp;
451 }, width);
452 /* *INDENT-ON* */
453 srcp += srcskip;
454 dstp += dstskip;
455 }
456 }
457}
458
459/* fast ARGB888->(A)RGB888 blending with pixel alpha */
460static void
462{
463 int width = info->dst_w;
464 int height = info->dst_h;
465 Uint32 *srcp = (Uint32 *) info->src;
466 int srcskip = info->src_skip >> 2;
467 Uint32 *dstp = (Uint32 *) info->dst;
468 int dstskip = info->dst_skip >> 2;
469
470 while (height--) {
471 /* *INDENT-OFF* */
473 Uint32 dalpha;
474 Uint32 d;
475 Uint32 s1;
476 Uint32 d1;
477 Uint32 s = *srcp;
478 Uint32 alpha = s >> 24;
479 /* FIXME: Here we special-case opaque alpha since the
480 compositioning used (>>8 instead of /255) doesn't handle
481 it correctly. Also special-case alpha=0 for speed?
482 Benchmark this! */
483 if (alpha) {
484 if (alpha == SDL_ALPHA_OPAQUE) {
485 *dstp = *srcp;
486 } else {
487 /*
488 * take out the middle component (green), and process
489 * the other two in parallel. One multiply less.
490 */
491 d = *dstp;
492 dalpha = d >> 24;
493 s1 = s & 0xff00ff;
494 d1 = d & 0xff00ff;
495 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
496 s &= 0xff00;
497 d &= 0xff00;
498 d = (d + ((s - d) * alpha >> 8)) & 0xff00;
499 dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
500 *dstp = d1 | d | (dalpha << 24);
501 }
502 }
503 ++srcp;
504 ++dstp;
505 }, width);
506 /* *INDENT-ON* */
507 srcp += srcskip;
508 dstp += dstskip;
509 }
510}
511
512#ifdef __3dNOW__
513/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
514static void
515BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
516{
517 int width = info->dst_w;
518 int height = info->dst_h;
519 Uint32 *srcp = (Uint32 *) info->src;
520 int srcskip = info->src_skip >> 2;
521 Uint32 *dstp = (Uint32 *) info->dst;
522 int dstskip = info->dst_skip >> 2;
523 SDL_PixelFormat *sf = info->src_fmt;
524 Uint32 amask = sf->Amask;
525 Uint32 ashift = sf->Ashift;
526 Uint64 multmask, multmask2;
527
528 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
529
530 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
531 multmask = 0x00FF;
532 multmask <<= (ashift * 2);
533 multmask2 = 0x00FF00FF00FF00FFULL;
534
535 while (height--) {
536 /* *INDENT-OFF* */
539
540 _m_prefetch(srcp + 16);
541 _m_prefetch(dstp + 16);
542
543 alpha = *srcp & amask;
544 if (alpha == 0) {
545 /* do nothing */
546 } else if (alpha == amask) {
547 *dstp = *srcp;
548 } else {
549 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
550 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
551
552 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
553 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
554
555 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
556 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
557 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
558 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
559 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */
560 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */
561
562
563 /* blend */
564 src1 = _mm_mullo_pi16(src1, mm_alpha);
565 src1 = _mm_srli_pi16(src1, 8);
566 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
567 dst1 = _mm_srli_pi16(dst1, 8);
568 dst1 = _mm_add_pi16(src1, dst1);
569 dst1 = _mm_packs_pu16(dst1, mm_zero);
570
571 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
572 }
573 ++srcp;
574 ++dstp;
575 }, width);
576 /* *INDENT-ON* */
577 srcp += srcskip;
578 dstp += dstskip;
579 }
580 _mm_empty();
581}
582
583#endif /* __3dNOW__ */
584
585/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
586
587/* blend a single 16 bit pixel at 50% */
588#define BLEND16_50(d, s, mask) \
589 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
590
591/* blend two 16 bit pixels at 50% */
592#define BLEND2x16_50(d, s, mask) \
593 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
594 + (s & d & (~(mask | mask << 16))))
595
596static void
598{
599 int width = info->dst_w;
600 int height = info->dst_h;
601 Uint16 *srcp = (Uint16 *) info->src;
602 int srcskip = info->src_skip >> 1;
603 Uint16 *dstp = (Uint16 *) info->dst;
604 int dstskip = info->dst_skip >> 1;
605
606 while (height--) {
607 if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) {
608 /*
609 * Source and destination not aligned, pipeline it.
610 * This is mostly a win for big blits but no loss for
611 * small ones
612 */
613 Uint32 prev_sw;
614 int w = width;
615
616 /* handle odd destination */
617 if ((uintptr_t) dstp & 2) {
618 Uint16 d = *dstp, s = *srcp;
619 *dstp = BLEND16_50(d, s, mask);
620 dstp++;
621 srcp++;
622 w--;
623 }
624 srcp++; /* srcp is now 32-bit aligned */
625
626 /* bootstrap pipeline with first halfword */
627 prev_sw = ((Uint32 *) srcp)[-1];
628
629 while (w > 1) {
630 Uint32 sw, dw, s;
631 sw = *(Uint32 *) srcp;
632 dw = *(Uint32 *) dstp;
633#if SDL_BYTEORDER == SDL_BIG_ENDIAN
634 s = (prev_sw << 16) + (sw >> 16);
635#else
636 s = (prev_sw >> 16) + (sw << 16);
637#endif
638 prev_sw = sw;
639 *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask);
640 dstp += 2;
641 srcp += 2;
642 w -= 2;
643 }
644
645 /* final pixel if any */
646 if (w) {
647 Uint16 d = *dstp, s;
648#if SDL_BYTEORDER == SDL_BIG_ENDIAN
649 s = (Uint16) prev_sw;
650#else
651 s = (Uint16) (prev_sw >> 16);
652#endif
653 *dstp = BLEND16_50(d, s, mask);
654 srcp++;
655 dstp++;
656 }
657 srcp += srcskip - 1;
658 dstp += dstskip;
659 } else {
660 /* source and destination are aligned */
661 int w = width;
662
663 /* first odd pixel? */
664 if ((uintptr_t) srcp & 2) {
665 Uint16 d = *dstp, s = *srcp;
666 *dstp = BLEND16_50(d, s, mask);
667 srcp++;
668 dstp++;
669 w--;
670 }
671 /* srcp and dstp are now 32-bit aligned */
672
673 while (w > 1) {
674 Uint32 sw = *(Uint32 *) srcp;
675 Uint32 dw = *(Uint32 *) dstp;
676 *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask);
677 srcp += 2;
678 dstp += 2;
679 w -= 2;
680 }
681
682 /* last odd pixel? */
683 if (w) {
684 Uint16 d = *dstp, s = *srcp;
685 *dstp = BLEND16_50(d, s, mask);
686 srcp++;
687 dstp++;
688 }
689 srcp += srcskip;
690 dstp += dstskip;
691 }
692 }
693}
694
695#ifdef __MMX__
696
697/* fast RGB565->RGB565 blending with surface alpha */
698static void
699Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
700{
701 unsigned alpha = info->a;
702 if (alpha == 128) {
703 Blit16to16SurfaceAlpha128(info, 0xf7de);
704 } else {
705 int width = info->dst_w;
706 int height = info->dst_h;
707 Uint16 *srcp = (Uint16 *) info->src;
708 int srcskip = info->src_skip >> 1;
709 Uint16 *dstp = (Uint16 *) info->dst;
710 int dstskip = info->dst_skip >> 1;
711 Uint32 s, d;
712
713 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
714
715 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
716 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
717 alpha >>= 3; /* downscale alpha to 5 bits */
718
719 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
720 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
721 /* position alpha to allow for mullo and mulhi on diff channels
722 to reduce the number of operations */
723 mm_alpha = _mm_slli_si64(mm_alpha, 3);
724
725 /* Setup the 565 color channel masks */
726 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
727 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
728
729 while (height--) {
730 /* *INDENT-OFF* */
732 {
733 s = *srcp++;
734 d = *dstp;
735 /*
736 * shift out the middle component (green) to
737 * the high 16 bits, and process all three RGB
738 * components at the same time.
739 */
740 s = (s | s << 16) & 0x07e0f81f;
741 d = (d | d << 16) & 0x07e0f81f;
742 d += (s - d) * alpha >> 5;
743 d &= 0x07e0f81f;
744 *dstp++ = (Uint16)(d | d >> 16);
745 },{
746 s = *srcp++;
747 d = *dstp;
748 /*
749 * shift out the middle component (green) to
750 * the high 16 bits, and process all three RGB
751 * components at the same time.
752 */
753 s = (s | s << 16) & 0x07e0f81f;
754 d = (d | d << 16) & 0x07e0f81f;
755 d += (s - d) * alpha >> 5;
756 d &= 0x07e0f81f;
757 *dstp++ = (Uint16)(d | d >> 16);
758 s = *srcp++;
759 d = *dstp;
760 /*
761 * shift out the middle component (green) to
762 * the high 16 bits, and process all three RGB
763 * components at the same time.
764 */
765 s = (s | s << 16) & 0x07e0f81f;
766 d = (d | d << 16) & 0x07e0f81f;
767 d += (s - d) * alpha >> 5;
768 d &= 0x07e0f81f;
769 *dstp++ = (Uint16)(d | d >> 16);
770 },{
771 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
772 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
773
774 /* red */
775 src2 = src1;
776 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */
777
778 dst2 = dst1;
779 dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */
780
781 /* blend */
782 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
783 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
784 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
785 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
786 dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */
787
788 mm_res = dst2; /* RED -> mm_res */
789
790 /* green -- process the bits in place */
791 src2 = src1;
792 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
793
794 dst2 = dst1;
795 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
796
797 /* blend */
798 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
799 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
800 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
801 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
802
803 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
804
805 /* blue */
806 src2 = src1;
807 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
808
809 dst2 = dst1;
810 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
811
812 /* blend */
813 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
814 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
815 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
816 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
817 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
818
819 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
820
821 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
822
823 srcp += 4;
824 dstp += 4;
825 }, width);
826 /* *INDENT-ON* */
827 srcp += srcskip;
828 dstp += dstskip;
829 }
830 _mm_empty();
831 }
832}
833
834/* fast RGB555->RGB555 blending with surface alpha */
835static void
836Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
837{
838 unsigned alpha = info->a;
839 if (alpha == 128) {
840 Blit16to16SurfaceAlpha128(info, 0xfbde);
841 } else {
842 int width = info->dst_w;
843 int height = info->dst_h;
844 Uint16 *srcp = (Uint16 *) info->src;
845 int srcskip = info->src_skip >> 1;
846 Uint16 *dstp = (Uint16 *) info->dst;
847 int dstskip = info->dst_skip >> 1;
848 Uint32 s, d;
849
850 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
851
852 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
853 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
854 alpha >>= 3; /* downscale alpha to 5 bits */
855
856 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
857 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
858 /* position alpha to allow for mullo and mulhi on diff channels
859 to reduce the number of operations */
860 mm_alpha = _mm_slli_si64(mm_alpha, 3);
861
862 /* Setup the 555 color channel masks */
863 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
864 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
865 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
866
867 while (height--) {
868 /* *INDENT-OFF* */
870 {
871 s = *srcp++;
872 d = *dstp;
873 /*
874 * shift out the middle component (green) to
875 * the high 16 bits, and process all three RGB
876 * components at the same time.
877 */
878 s = (s | s << 16) & 0x03e07c1f;
879 d = (d | d << 16) & 0x03e07c1f;
880 d += (s - d) * alpha >> 5;
881 d &= 0x03e07c1f;
882 *dstp++ = (Uint16)(d | d >> 16);
883 },{
884 s = *srcp++;
885 d = *dstp;
886 /*
887 * shift out the middle component (green) to
888 * the high 16 bits, and process all three RGB
889 * components at the same time.
890 */
891 s = (s | s << 16) & 0x03e07c1f;
892 d = (d | d << 16) & 0x03e07c1f;
893 d += (s - d) * alpha >> 5;
894 d &= 0x03e07c1f;
895 *dstp++ = (Uint16)(d | d >> 16);
896 s = *srcp++;
897 d = *dstp;
898 /*
899 * shift out the middle component (green) to
900 * the high 16 bits, and process all three RGB
901 * components at the same time.
902 */
903 s = (s | s << 16) & 0x03e07c1f;
904 d = (d | d << 16) & 0x03e07c1f;
905 d += (s - d) * alpha >> 5;
906 d &= 0x03e07c1f;
907 *dstp++ = (Uint16)(d | d >> 16);
908 },{
909 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
910 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
911
912 /* red -- process the bits in place */
913 src2 = src1;
914 src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */
915
916 dst2 = dst1;
917 dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */
918
919 /* blend */
920 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
921 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
922 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
923 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
924 dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */
925
926 mm_res = dst2; /* RED -> mm_res */
927
928 /* green -- process the bits in place */
929 src2 = src1;
930 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
931
932 dst2 = dst1;
933 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
934
935 /* blend */
936 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
937 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
938 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
939 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
940
941 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
942
943 /* blue */
944 src2 = src1; /* src -> src2 */
945 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
946
947 dst2 = dst1; /* dst -> dst2 */
948 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
949
950 /* blend */
951 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
952 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
953 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
954 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
955 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
956
957 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
958
959 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
960
961 srcp += 4;
962 dstp += 4;
963 }, width);
964 /* *INDENT-ON* */
965 srcp += srcskip;
966 dstp += dstskip;
967 }
968 _mm_empty();
969 }
970}
971
972#endif /* __MMX__ */
973
974/* fast RGB565->RGB565 blending with surface alpha */
975static void
977{
978 unsigned alpha = info->a;
979 if (alpha == 128) {
980 Blit16to16SurfaceAlpha128(info, 0xf7de);
981 } else {
982 int width = info->dst_w;
983 int height = info->dst_h;
984 Uint16 *srcp = (Uint16 *) info->src;
985 int srcskip = info->src_skip >> 1;
986 Uint16 *dstp = (Uint16 *) info->dst;
987 int dstskip = info->dst_skip >> 1;
988 alpha >>= 3; /* downscale alpha to 5 bits */
989
990 while (height--) {
991 /* *INDENT-OFF* */
993 Uint32 s = *srcp++;
994 Uint32 d = *dstp;
995 /*
996 * shift out the middle component (green) to
997 * the high 16 bits, and process all three RGB
998 * components at the same time.
999 */
1000 s = (s | s << 16) & 0x07e0f81f;
1001 d = (d | d << 16) & 0x07e0f81f;
1002 d += (s - d) * alpha >> 5;
1003 d &= 0x07e0f81f;
1004 *dstp++ = (Uint16)(d | d >> 16);
1005 }, width);
1006 /* *INDENT-ON* */
1007 srcp += srcskip;
1008 dstp += dstskip;
1009 }
1010 }
1011}
1012
1013/* fast RGB555->RGB555 blending with surface alpha */
1014static void
1016{
1017 unsigned alpha = info->a; /* downscale alpha to 5 bits */
1018 if (alpha == 128) {
1019 Blit16to16SurfaceAlpha128(info, 0xfbde);
1020 } else {
1021 int width = info->dst_w;
1022 int height = info->dst_h;
1023 Uint16 *srcp = (Uint16 *) info->src;
1024 int srcskip = info->src_skip >> 1;
1025 Uint16 *dstp = (Uint16 *) info->dst;
1026 int dstskip = info->dst_skip >> 1;
1027 alpha >>= 3; /* downscale alpha to 5 bits */
1028
1029 while (height--) {
1030 /* *INDENT-OFF* */
1031 DUFFS_LOOP4({
1032 Uint32 s = *srcp++;
1033 Uint32 d = *dstp;
1034 /*
1035 * shift out the middle component (green) to
1036 * the high 16 bits, and process all three RGB
1037 * components at the same time.
1038 */
1039 s = (s | s << 16) & 0x03e07c1f;
1040 d = (d | d << 16) & 0x03e07c1f;
1041 d += (s - d) * alpha >> 5;
1042 d &= 0x03e07c1f;
1043 *dstp++ = (Uint16)(d | d >> 16);
1044 }, width);
1045 /* *INDENT-ON* */
1046 srcp += srcskip;
1047 dstp += dstskip;
1048 }
1049 }
1050}
1051
1052/* fast ARGB8888->RGB565 blending with pixel alpha */
1053static void
1055{
1056 int width = info->dst_w;
1057 int height = info->dst_h;
1058 Uint32 *srcp = (Uint32 *) info->src;
1059 int srcskip = info->src_skip >> 2;
1060 Uint16 *dstp = (Uint16 *) info->dst;
1061 int dstskip = info->dst_skip >> 1;
1062
1063 while (height--) {
1064 /* *INDENT-OFF* */
1065 DUFFS_LOOP4({
1066 Uint32 s = *srcp;
1067 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
1068 /* FIXME: Here we special-case opaque alpha since the
1069 compositioning used (>>8 instead of /255) doesn't handle
1070 it correctly. Also special-case alpha=0 for speed?
1071 Benchmark this! */
1072 if(alpha) {
1073 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
1074 *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
1075 } else {
1076 Uint32 d = *dstp;
1077 /*
1078 * convert source and destination to G0RAB65565
1079 * and blend all components at the same time
1080 */
1081 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
1082 + (s >> 3 & 0x1f);
1083 d = (d | d << 16) & 0x07e0f81f;
1084 d += (s - d) * alpha >> 5;
1085 d &= 0x07e0f81f;
1086 *dstp = (Uint16)(d | d >> 16);
1087 }
1088 }
1089 srcp++;
1090 dstp++;
1091 }, width);
1092 /* *INDENT-ON* */
1093 srcp += srcskip;
1094 dstp += dstskip;
1095 }
1096}
1097
1098/* fast ARGB8888->RGB555 blending with pixel alpha */
1099static void
1101{
1102 int width = info->dst_w;
1103 int height = info->dst_h;
1104 Uint32 *srcp = (Uint32 *) info->src;
1105 int srcskip = info->src_skip >> 2;
1106 Uint16 *dstp = (Uint16 *) info->dst;
1107 int dstskip = info->dst_skip >> 1;
1108
1109 while (height--) {
1110 /* *INDENT-OFF* */
1111 DUFFS_LOOP4({
1112 unsigned alpha;
1113 Uint32 s = *srcp;
1114 alpha = s >> 27; /* downscale alpha to 5 bits */
1115 /* FIXME: Here we special-case opaque alpha since the
1116 compositioning used (>>8 instead of /255) doesn't handle
1117 it correctly. Also special-case alpha=0 for speed?
1118 Benchmark this! */
1119 if(alpha) {
1120 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
1121 *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
1122 } else {
1123 Uint32 d = *dstp;
1124 /*
1125 * convert source and destination to G0RAB65565
1126 * and blend all components at the same time
1127 */
1128 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
1129 + (s >> 3 & 0x1f);
1130 d = (d | d << 16) & 0x03e07c1f;
1131 d += (s - d) * alpha >> 5;
1132 d &= 0x03e07c1f;
1133 *dstp = (Uint16)(d | d >> 16);
1134 }
1135 }
1136 srcp++;
1137 dstp++;
1138 }, width);
1139 /* *INDENT-ON* */
1140 srcp += srcskip;
1141 dstp += dstskip;
1142 }
1143}
1144
1145/* General (slow) N->N blending with per-surface alpha */
1146static void
1148{
1149 int width = info->dst_w;
1150 int height = info->dst_h;
1151 Uint8 *src = info->src;
1152 int srcskip = info->src_skip;
1153 Uint8 *dst = info->dst;
1154 int dstskip = info->dst_skip;
1155 SDL_PixelFormat *srcfmt = info->src_fmt;
1156 SDL_PixelFormat *dstfmt = info->dst_fmt;
1157 int srcbpp = srcfmt->BytesPerPixel;
1158 int dstbpp = dstfmt->BytesPerPixel;
1159 Uint32 Pixel;
1160 unsigned sR, sG, sB;
1161 unsigned dR, dG, dB, dA;
1162 const unsigned sA = info->a;
1163
1164 if (sA) {
1165 while (height--) {
1166 /* *INDENT-OFF* */
1168 {
1169 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
1170 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1171 ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1172 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1173 src += srcbpp;
1174 dst += dstbpp;
1175 },
1176 width);
1177 /* *INDENT-ON* */
1178 src += srcskip;
1179 dst += dstskip;
1180 }
1181 }
1182}
1183
1184/* General (slow) colorkeyed N->N blending with per-surface alpha */
1185static void
1187{
1188 int width = info->dst_w;
1189 int height = info->dst_h;
1190 Uint8 *src = info->src;
1191 int srcskip = info->src_skip;
1192 Uint8 *dst = info->dst;
1193 int dstskip = info->dst_skip;
1194 SDL_PixelFormat *srcfmt = info->src_fmt;
1195 SDL_PixelFormat *dstfmt = info->dst_fmt;
1196 Uint32 ckey = info->colorkey;
1197 int srcbpp = srcfmt->BytesPerPixel;
1198 int dstbpp = dstfmt->BytesPerPixel;
1199 Uint32 Pixel;
1200 unsigned sR, sG, sB;
1201 unsigned dR, dG, dB, dA;
1202 const unsigned sA = info->a;
1203
1204 while (height--) {
1205 /* *INDENT-OFF* */
1207 {
1208 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
1209 if(sA && Pixel != ckey) {
1210 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
1211 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1212 ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1213 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1214 }
1215 src += srcbpp;
1216 dst += dstbpp;
1217 },
1218 width);
1219 /* *INDENT-ON* */
1220 src += srcskip;
1221 dst += dstskip;
1222 }
1223}
1224
1225/* General (slow) N->N blending with pixel alpha */
1226static void
1228{
1229 int width = info->dst_w;
1230 int height = info->dst_h;
1231 Uint8 *src = info->src;
1232 int srcskip = info->src_skip;
1233 Uint8 *dst = info->dst;
1234 int dstskip = info->dst_skip;
1235 SDL_PixelFormat *srcfmt = info->src_fmt;
1236 SDL_PixelFormat *dstfmt = info->dst_fmt;
1237 int srcbpp;
1238 int dstbpp;
1239 Uint32 Pixel;
1240 unsigned sR, sG, sB, sA;
1241 unsigned dR, dG, dB, dA;
1242
1243 /* Set up some basic variables */
1244 srcbpp = srcfmt->BytesPerPixel;
1245 dstbpp = dstfmt->BytesPerPixel;
1246
1247 while (height--) {
1248 /* *INDENT-OFF* */
1250 {
1251 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
1252 if(sA) {
1253 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1254 ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1255 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1256 }
1257 src += srcbpp;
1258 dst += dstbpp;
1259 },
1260 width);
1261 /* *INDENT-ON* */
1262 src += srcskip;
1263 dst += dstskip;
1264 }
1265}
1266
1267
1270{
1272 SDL_PixelFormat *df = surface->map->dst->format;
1273
1274 switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
1275 case SDL_COPY_BLEND:
1276 /* Per-pixel alpha blits */
1277 switch (df->BytesPerPixel) {
1278 case 1:
1279 if (df->palette != NULL) {
1280 return BlitNto1PixelAlpha;
1281 } else {
1282 /* RGB332 has no palette ! */
1283 return BlitNtoNPixelAlpha;
1284 }
1285
1286 case 2:
1287 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
1288 && sf->Gmask == 0xff00
1289 && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
1290 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
1291 if (df->Gmask == 0x7e0)
1293 else if (df->Gmask == 0x3e0)
1295 }
1296 return BlitNtoNPixelAlpha;
1297
1298 case 4:
1299 if (sf->Rmask == df->Rmask
1300 && sf->Gmask == df->Gmask
1301 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
1302#if defined(__MMX__) || defined(__3dNOW__)
1303 if (sf->Rshift % 8 == 0
1304 && sf->Gshift % 8 == 0
1305 && sf->Bshift % 8 == 0
1306 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
1307#ifdef __3dNOW__
1308 if (SDL_Has3DNow())
1309 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
1310#endif
1311#ifdef __MMX__
1312 if (SDL_HasMMX())
1313 return BlitRGBtoRGBPixelAlphaMMX;
1314#endif
1315 }
1316#endif /* __MMX__ || __3dNOW__ */
1317 if (sf->Amask == 0xff000000) {
1319 }
1320 }
1321 return BlitNtoNPixelAlpha;
1322
1323 case 3:
1324 default:
1325 break;
1326 }
1327 return BlitNtoNPixelAlpha;
1328
1330 if (sf->Amask == 0) {
1331 /* Per-surface alpha blits */
1332 switch (df->BytesPerPixel) {
1333 case 1:
1334 if (df->palette != NULL) {
1335 return BlitNto1SurfaceAlpha;
1336 } else {
1337 /* RGB332 has no palette ! */
1338 return BlitNtoNSurfaceAlpha;
1339 }
1340
1341 case 2:
1342 if (surface->map->identity) {
1343 if (df->Gmask == 0x7e0) {
1344#ifdef __MMX__
1345 if (SDL_HasMMX())
1346 return Blit565to565SurfaceAlphaMMX;
1347 else
1348#endif
1350 } else if (df->Gmask == 0x3e0) {
1351#ifdef __MMX__
1352 if (SDL_HasMMX())
1353 return Blit555to555SurfaceAlphaMMX;
1354 else
1355#endif
1357 }
1358 }
1359 return BlitNtoNSurfaceAlpha;
1360
1361 case 4:
1362 if (sf->Rmask == df->Rmask
1363 && sf->Gmask == df->Gmask
1364 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
1365#ifdef __MMX__
1366 if (sf->Rshift % 8 == 0
1367 && sf->Gshift % 8 == 0
1368 && sf->Bshift % 8 == 0 && SDL_HasMMX())
1369 return BlitRGBtoRGBSurfaceAlphaMMX;
1370#endif
1371 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
1373 }
1374 }
1375 return BlitNtoNSurfaceAlpha;
1376
1377 case 3:
1378 default:
1379 return BlitNtoNSurfaceAlpha;
1380 }
1381 }
1382 break;
1383
1385 if (sf->Amask == 0) {
1386 if (df->BytesPerPixel == 1) {
1387
1388 if (df->palette != NULL) {
1390 } else {
1391 /* RGB332 has no palette ! */
1393 }
1394 } else {
1396 }
1397 }
1398 break;
1399 }
1400
1401 return NULL;
1402}
1403
1404/* vi: set ts=4 sw=4 expandtab: */
#define DUFFS_LOOP_124(pixel_copy_increment1, pixel_copy_increment2, pixel_copy_increment4, width)
Definition: SDL_blit.h:504
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)
Definition: SDL_blit.h:402
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)
Definition: SDL_blit.h:177
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)
Definition: SDL_blit.h:454
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)
Definition: SDL_blit.h:146
#define SDL_COPY_RLE_MASK
Definition: SDL_blit.h:44
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)
Definition: SDL_blit.h:122
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)
Definition: SDL_blit.h:353
#define DUFFS_LOOP(pixel_copy_increment, width)
Definition: SDL_blit.h:500
#define SDL_COPY_COLORKEY
Definition: SDL_blit.h:39
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)
Definition: SDL_blit.h:445
#define SDL_COPY_MODULATE_ALPHA
Definition: SDL_blit.h:35
void(* SDL_BlitFunc)(SDL_BlitInfo *info)
Definition: SDL_blit.h:73
#define SDL_COPY_BLEND
Definition: SDL_blit.h:36
#define DUFFS_LOOP4(pixel_copy_increment, width)
Definition: SDL_blit.h:488
static void BlitNto1PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:77
SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
Definition: SDL_blit_A.c:1269
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1147
#define BLEND16_50(d, s, mask)
Definition: SDL_blit_A.c:588
#define BLEND2x16_50(d, s, mask)
Definition: SDL_blit_A.c:592
static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
Definition: SDL_blit_A.c:597
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:30
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:123
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1100
static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1186
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:976
static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:419
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:394
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1054
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1227
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:461
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1015
unsigned int uintptr_t
#define SDL_HasMMX
#define SDL_Has3DNow
SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char const char SDL_SCANF_FORMAT_STRING const char return SDL_ThreadFunction const char void return Uint32 return Uint32 SDL_AssertionHandler void SDL_SpinLock SDL_atomic_t int int return SDL_atomic_t return void void void return void return int return SDL_AudioSpec SDL_AudioSpec return int int return return int SDL_RWops int SDL_AudioSpec Uint8 ** d
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
GLdouble s
Definition: SDL_opengl.h:2063
GLenum src
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
GLenum GLenum dst
GLfloat GLfloat GLfloat alpha
GLdouble n
GLenum GLint GLuint mask
GLubyte GLubyte GLubyte GLubyte w
#define SDL_ALPHA_OPAQUE
Definition: SDL_pixels.h:46
uint32_t Uint32
Definition: SDL_stdinc.h:203
uint64_t Uint64
Definition: SDL_stdinc.h:216
uint16_t Uint16
Definition: SDL_stdinc.h:191
uint8_t Uint8
Definition: SDL_stdinc.h:179
#define NULL
Definition: begin_code.h:167
EGLSurface surface
Definition: eglext.h:248
SDL_PixelFormat * src_fmt
Definition: SDL_blit.h:65
Uint8 * table
Definition: SDL_blit.h:67
int dst_skip
Definition: SDL_blit.h:64
SDL_PixelFormat * dst_fmt
Definition: SDL_blit.h:66
int src_skip
Definition: SDL_blit.h:60
Uint8 a
Definition: SDL_blit.h:70
Uint8 * src
Definition: SDL_blit.h:57
Uint32 colorkey
Definition: SDL_blit.h:69
Uint8 * dst
Definition: SDL_blit.h:61
Uint8 r
Definition: SDL_pixels.h:297
Uint8 b
Definition: SDL_pixels.h:299
Uint8 g
Definition: SDL_pixels.h:298
SDL_Color * colors
Definition: SDL_pixels.h:307
Uint8 BytesPerPixel
Definition: SDL_pixels.h:320
SDL_Palette * palette
Definition: SDL_pixels.h:318
A collection of pixels used in software blitting.
Definition: SDL_surface.h:71