Annotation of /trunk/libsdl/patches/libsdl-1.2.9-PIC-load-mmx-masks-from-stack.patch
Parent Directory | Revision Log
Revision 144 -
(hide annotations)
(download)
Tue May 8 20:06:05 2007 UTC (17 years, 4 months ago) by niro
File size: 3901 byte(s)
Tue May 8 20:06:05 2007 UTC (17 years, 4 months ago) by niro
File size: 3901 byte(s)
-import
1 | niro | 144 | --- src/hermes/mmxp2_32.asm |
2 | +++ src/hermes/mmxp2_32.asm | ||
3 | @@ -27,22 +27,37 @@ GLOBAL _ConvertMMXpII32_16BGR565 | ||
4 | GLOBAL _ConvertMMXpII32_16RGB555 | ||
5 | GLOBAL _ConvertMMXpII32_16BGR555 | ||
6 | |||
7 | -SECTION .data | ||
8 | - | ||
9 | -ALIGN 8 | ||
10 | - | ||
11 | -;; Constants for conversion routines | ||
12 | - | ||
13 | -mmx32_rgb888_mask dd 00ffffffh,00ffffffh | ||
14 | - | ||
15 | -mmx32_rgb565_b dd 000000f8h, 000000f8h | ||
16 | -mmx32_rgb565_g dd 0000fc00h, 0000fc00h | ||
17 | -mmx32_rgb565_r dd 00f80000h, 00f80000h | ||
18 | - | ||
19 | -mmx32_rgb555_rb dd 00f800f8h,00f800f8h | ||
20 | -mmx32_rgb555_g dd 0000f800h,0000f800h | ||
21 | -mmx32_rgb555_mul dd 20000008h,20000008h | ||
22 | -mmx32_bgr555_mul dd 00082000h,00082000h | ||
23 | + | ||
24 | +;; Macros for conversion routines | ||
25 | + | ||
26 | +%macro _push_immq_mask 1 | ||
27 | + push dword %1 | ||
28 | + push dword %1 | ||
29 | +%endmacro | ||
30 | + | ||
31 | +%macro load_immq 2 | ||
32 | + _push_immq_mask %2 | ||
33 | + movq %1, [esp] | ||
34 | +%endmacro | ||
35 | + | ||
36 | +%macro pand_immq 2 | ||
37 | + _push_immq_mask %2 | ||
38 | + pand %1, [esp] | ||
39 | +%endmacro | ||
40 | + | ||
41 | +%define CLEANUP_IMMQ_LOADS(num) \ | ||
42 | + add esp, byte 8 * num | ||
43 | + | ||
44 | +%define mmx32_rgb888_mask 00ffffffh | ||
45 | + | ||
46 | +%define mmx32_rgb565_b 000000f8h | ||
47 | +%define mmx32_rgb565_g 0000fc00h | ||
48 | +%define mmx32_rgb565_r 00f80000h | ||
49 | + | ||
50 | +%define mmx32_rgb555_rb 00f800f8h | ||
51 | +%define mmx32_rgb555_g 0000f800h | ||
52 | +%define mmx32_rgb555_mul 20000008h | ||
53 | +%define mmx32_bgr555_mul 00082000h | ||
54 | |||
55 | |||
56 | |||
57 | @@ -53,7 +66,8 @@ SECTION .text | ||
58 | _ConvertMMXpII32_24RGB888: | ||
59 | |||
60 | ; set up mm6 as the mask, mm7 as zero | ||
61 | - movq mm6, qword [mmx32_rgb888_mask] | ||
62 | + load_immq mm6, mmx32_rgb888_mask | ||
63 | + CLEANUP_IMMQ_LOADS(1) | ||
64 | pxor mm7, mm7 | ||
65 | |||
66 | mov edx, ecx ; save ecx | ||
67 | @@ -108,9 +122,10 @@ _ConvertMMXpII32_24RGB888: | ||
68 | _ConvertMMXpII32_16RGB565: | ||
69 | |||
70 | ; set up masks | ||
71 | - movq mm5, [mmx32_rgb565_b] | ||
72 | - movq mm6, [mmx32_rgb565_g] | ||
73 | - movq mm7, [mmx32_rgb565_r] | ||
74 | + load_immq mm5, mmx32_rgb565_b | ||
75 | + load_immq mm6, mmx32_rgb565_g | ||
76 | + load_immq mm7, mmx32_rgb565_r | ||
77 | + CLEANUP_IMMQ_LOADS(3) | ||
78 | |||
79 | mov edx, ecx | ||
80 | shr ecx, 2 | ||
81 | @@ -176,9 +191,10 @@ _ConvertMMXpII32_16RGB565: | ||
82 | |||
83 | _ConvertMMXpII32_16BGR565: | ||
84 | |||
85 | - movq mm5, [mmx32_rgb565_r] | ||
86 | - movq mm6, [mmx32_rgb565_g] | ||
87 | - movq mm7, [mmx32_rgb565_b] | ||
88 | + load_immq mm5, mmx32_rgb565_r | ||
89 | + load_immq mm6, mmx32_rgb565_g | ||
90 | + load_immq mm7, mmx32_rgb565_b | ||
91 | + CLEANUP_IMMQ_LOADS(3) | ||
92 | |||
93 | mov edx, ecx | ||
94 | shr ecx, 2 | ||
95 | @@ -253,7 +269,7 @@ _ConvertMMXpII32_16BGR555: | ||
96 | ; except it uses a different multiplier for the pmaddwd | ||
97 | ; instruction. cool huh. | ||
98 | |||
99 | - movq mm7, qword [mmx32_bgr555_mul] | ||
100 | + load_immq mm7, mmx32_bgr555_mul | ||
101 | jmp _convert_bgr555_cheat | ||
102 | |||
103 | ; This is the same as the Intel version.. they obviously went to | ||
104 | @@ -263,9 +279,10 @@ _ConvertMMXpII32_16BGR555: | ||
105 | ; (I think) a more accurate name.. | ||
106 | _ConvertMMXpII32_16RGB555: | ||
107 | |||
108 | - movq mm7,qword [mmx32_rgb555_mul] | ||
109 | + load_immq mm7, mmx32_rgb555_mul | ||
110 | _convert_bgr555_cheat: | ||
111 | - movq mm6,qword [mmx32_rgb555_g] | ||
112 | + load_immq mm6, mmx32_rgb555_g | ||
113 | + CLEANUP_IMMQ_LOADS(2) | ||
114 | |||
115 | mov edx,ecx ; Save ecx | ||
116 | |||
117 | @@ -280,12 +297,14 @@ _convert_bgr555_cheat: | ||
118 | movq mm0,[esi] | ||
119 | movq mm3,mm2 | ||
120 | |||
121 | - pand mm3,qword [mmx32_rgb555_rb] | ||
122 | + pand_immq mm3, mmx32_rgb555_rb | ||
123 | movq mm1,mm0 | ||
124 | |||
125 | - pand mm1,qword [mmx32_rgb555_rb] | ||
126 | + pand_immq mm1, mmx32_rgb555_rb | ||
127 | pmaddwd mm3,mm7 | ||
128 | |||
129 | + CLEANUP_IMMQ_LOADS(2) | ||
130 | + | ||
131 | pmaddwd mm1,mm7 | ||
132 | pand mm2,mm6 | ||
133 | |||
134 | @@ -302,13 +321,13 @@ _convert_bgr555_cheat: | ||
135 | movq mm0,mm4 | ||
136 | psrld mm1,6 | ||
137 | |||
138 | - pand mm0,qword [mmx32_rgb555_rb] | ||
139 | + pand_immq mm0, mmx32_rgb555_rb | ||
140 | packssdw mm1,mm3 | ||
141 | |||
142 | movq mm3,mm5 | ||
143 | pmaddwd mm0,mm7 | ||
144 | |||
145 | - pand mm3,qword [mmx32_rgb555_rb] | ||
146 | + pand_immq mm3, mmx32_rgb555_rb | ||
147 | pand mm4,mm6 | ||
148 | |||
149 | movq [edi],mm1 | ||
150 | @@ -329,12 +348,14 @@ _convert_bgr555_cheat: | ||
151 | movq mm3,mm2 | ||
152 | movq mm1,mm0 | ||
153 | |||
154 | - pand mm3,qword [mmx32_rgb555_rb] | ||
155 | + pand_immq mm3, mmx32_rgb555_rb | ||
156 | packssdw mm5,mm4 | ||
157 | |||
158 | - pand mm1,qword [mmx32_rgb555_rb] | ||
159 | + pand_immq mm1, mmx32_rgb555_rb | ||
160 | pand mm2,mm6 | ||
161 | |||
162 | + CLEANUP_IMMQ_LOADS(4) | ||
163 | + | ||
164 | movq [edi+8],mm5 | ||
165 | pmaddwd mm3,mm7 | ||
166 |