Contents of /trunk/libsdl/patches/libsdl-1.2.9-PIC-load-mmx-masks-from-stack.patch
Parent Directory | Revision Log
Revision 144 -
(show annotations)
(download)
Tue May 8 20:06:05 2007 UTC (17 years, 4 months ago) by niro
File size: 3901 byte(s)
Tue May 8 20:06:05 2007 UTC (17 years, 4 months ago) by niro
File size: 3901 byte(s)
-import
1 | --- src/hermes/mmxp2_32.asm |
2 | +++ src/hermes/mmxp2_32.asm |
3 | @@ -27,22 +27,37 @@ GLOBAL _ConvertMMXpII32_16BGR565 |
4 | GLOBAL _ConvertMMXpII32_16RGB555 |
5 | GLOBAL _ConvertMMXpII32_16BGR555 |
6 | |
7 | -SECTION .data |
8 | - |
9 | -ALIGN 8 |
10 | - |
11 | -;; Constants for conversion routines |
12 | - |
13 | -mmx32_rgb888_mask dd 00ffffffh,00ffffffh |
14 | - |
15 | -mmx32_rgb565_b dd 000000f8h, 000000f8h |
16 | -mmx32_rgb565_g dd 0000fc00h, 0000fc00h |
17 | -mmx32_rgb565_r dd 00f80000h, 00f80000h |
18 | - |
19 | -mmx32_rgb555_rb dd 00f800f8h,00f800f8h |
20 | -mmx32_rgb555_g dd 0000f800h,0000f800h |
21 | -mmx32_rgb555_mul dd 20000008h,20000008h |
22 | -mmx32_bgr555_mul dd 00082000h,00082000h |
23 | + |
24 | +;; Macros for conversion routines |
25 | + |
26 | +%macro _push_immq_mask 1 |
27 | + push dword %1 |
28 | + push dword %1 |
29 | +%endmacro |
30 | + |
31 | +%macro load_immq 2 |
32 | + _push_immq_mask %2 |
33 | + movq %1, [esp] |
34 | +%endmacro |
35 | + |
36 | +%macro pand_immq 2 |
37 | + _push_immq_mask %2 |
38 | + pand %1, [esp] |
39 | +%endmacro |
40 | + |
41 | +%define CLEANUP_IMMQ_LOADS(num) \ |
42 | + add esp, byte 8 * num |
43 | + |
44 | +%define mmx32_rgb888_mask 00ffffffh |
45 | + |
46 | +%define mmx32_rgb565_b 000000f8h |
47 | +%define mmx32_rgb565_g 0000fc00h |
48 | +%define mmx32_rgb565_r 00f80000h |
49 | + |
50 | +%define mmx32_rgb555_rb 00f800f8h |
51 | +%define mmx32_rgb555_g 0000f800h |
52 | +%define mmx32_rgb555_mul 20000008h |
53 | +%define mmx32_bgr555_mul 00082000h |
54 | |
55 | |
56 | |
57 | @@ -53,7 +66,8 @@ SECTION .text |
58 | _ConvertMMXpII32_24RGB888: |
59 | |
60 | ; set up mm6 as the mask, mm7 as zero |
61 | - movq mm6, qword [mmx32_rgb888_mask] |
62 | + load_immq mm6, mmx32_rgb888_mask |
63 | + CLEANUP_IMMQ_LOADS(1) |
64 | pxor mm7, mm7 |
65 | |
66 | mov edx, ecx ; save ecx |
67 | @@ -108,9 +122,10 @@ _ConvertMMXpII32_24RGB888: |
68 | _ConvertMMXpII32_16RGB565: |
69 | |
70 | ; set up masks |
71 | - movq mm5, [mmx32_rgb565_b] |
72 | - movq mm6, [mmx32_rgb565_g] |
73 | - movq mm7, [mmx32_rgb565_r] |
74 | + load_immq mm5, mmx32_rgb565_b |
75 | + load_immq mm6, mmx32_rgb565_g |
76 | + load_immq mm7, mmx32_rgb565_r |
77 | + CLEANUP_IMMQ_LOADS(3) |
78 | |
79 | mov edx, ecx |
80 | shr ecx, 2 |
81 | @@ -176,9 +191,10 @@ _ConvertMMXpII32_16RGB565: |
82 | |
83 | _ConvertMMXpII32_16BGR565: |
84 | |
85 | - movq mm5, [mmx32_rgb565_r] |
86 | - movq mm6, [mmx32_rgb565_g] |
87 | - movq mm7, [mmx32_rgb565_b] |
88 | + load_immq mm5, mmx32_rgb565_r |
89 | + load_immq mm6, mmx32_rgb565_g |
90 | + load_immq mm7, mmx32_rgb565_b |
91 | + CLEANUP_IMMQ_LOADS(3) |
92 | |
93 | mov edx, ecx |
94 | shr ecx, 2 |
95 | @@ -253,7 +269,7 @@ _ConvertMMXpII32_16BGR555: |
96 | ; except it uses a different multiplier for the pmaddwd |
97 | ; instruction. cool huh. |
98 | |
99 | - movq mm7, qword [mmx32_bgr555_mul] |
100 | + load_immq mm7, mmx32_bgr555_mul |
101 | jmp _convert_bgr555_cheat |
102 | |
103 | ; This is the same as the Intel version.. they obviously went to |
104 | @@ -263,9 +279,10 @@ _ConvertMMXpII32_16BGR555: |
105 | ; (I think) a more accurate name.. |
106 | _ConvertMMXpII32_16RGB555: |
107 | |
108 | - movq mm7,qword [mmx32_rgb555_mul] |
109 | + load_immq mm7, mmx32_rgb555_mul |
110 | _convert_bgr555_cheat: |
111 | - movq mm6,qword [mmx32_rgb555_g] |
112 | + load_immq mm6, mmx32_rgb555_g |
113 | + CLEANUP_IMMQ_LOADS(2) |
114 | |
115 | mov edx,ecx ; Save ecx |
116 | |
117 | @@ -280,12 +297,14 @@ _convert_bgr555_cheat: |
118 | movq mm0,[esi] |
119 | movq mm3,mm2 |
120 | |
121 | - pand mm3,qword [mmx32_rgb555_rb] |
122 | + pand_immq mm3, mmx32_rgb555_rb |
123 | movq mm1,mm0 |
124 | |
125 | - pand mm1,qword [mmx32_rgb555_rb] |
126 | + pand_immq mm1, mmx32_rgb555_rb |
127 | pmaddwd mm3,mm7 |
128 | |
129 | + CLEANUP_IMMQ_LOADS(2) |
130 | + |
131 | pmaddwd mm1,mm7 |
132 | pand mm2,mm6 |
133 | |
134 | @@ -302,13 +321,13 @@ _convert_bgr555_cheat: |
135 | movq mm0,mm4 |
136 | psrld mm1,6 |
137 | |
138 | - pand mm0,qword [mmx32_rgb555_rb] |
139 | + pand_immq mm0, mmx32_rgb555_rb |
140 | packssdw mm1,mm3 |
141 | |
142 | movq mm3,mm5 |
143 | pmaddwd mm0,mm7 |
144 | |
145 | - pand mm3,qword [mmx32_rgb555_rb] |
146 | + pand_immq mm3, mmx32_rgb555_rb |
147 | pand mm4,mm6 |
148 | |
149 | movq [edi],mm1 |
150 | @@ -329,12 +348,14 @@ _convert_bgr555_cheat: |
151 | movq mm3,mm2 |
152 | movq mm1,mm0 |
153 | |
154 | - pand mm3,qword [mmx32_rgb555_rb] |
155 | + pand_immq mm3, mmx32_rgb555_rb |
156 | packssdw mm5,mm4 |
157 | |
158 | - pand mm1,qword [mmx32_rgb555_rb] |
159 | + pand_immq mm1, mmx32_rgb555_rb |
160 | pand mm2,mm6 |
161 | |
162 | + CLEANUP_IMMQ_LOADS(4) |
163 | + |
164 | movq [edi+8],mm5 |
165 | pmaddwd mm3,mm7 |
166 |