Contents of /trunk/smpeg/patches/smpeg-0.4.4-pic.patch
Parent Directory | Revision Log
Revision 153 -
(show annotations)
(download)
Tue May 8 20:52:56 2007 UTC (17 years, 4 months ago) by niro
File size: 9472 byte(s)
Tue May 8 20:52:56 2007 UTC (17 years, 4 months ago) by niro
File size: 9472 byte(s)
-import
1 | Fix PIC issues in mmx routines |
2 | |
3 | --- video/mmxflags_asm.S |
4 | +++ video/mmxflags_asm.S |
5 | @@ -1,11 +1,6 @@ |
6 | |
7 | #if defined(i386) && defined(USE_MMX) |
8 | |
9 | -.data |
10 | - .align 16 |
11 | - .type flags,@object |
12 | -flags: .long 0 |
13 | - |
14 | .text |
15 | .align 4 |
16 | .globl cpu_flags |
17 | @@ -40,16 +35,13 @@ cpu_flags: |
18 | xorl %ecx,%eax |
19 | je cpu_flags.L1 |
20 | |
21 | - pusha |
22 | + pushl %ebx |
23 | |
24 | movl $1,%eax |
25 | cpuid |
26 | + movl %edx,%eax |
27 | |
28 | - movl %edx,flags |
29 | - |
30 | - popa |
31 | - |
32 | - movl flags,%eax |
33 | + popl %ebx |
34 | |
35 | cpu_flags.L1: |
36 | ret |
37 | --- video/mmxidct_asm.S |
38 | +++ video/mmxidct_asm.S |
39 | @@ -31,11 +31,6 @@ preSC: .short 16384,22725,21407,19266, |
40 | x0005000200010001: |
41 | .long 0x00010001,0x00050002 |
42 | .align 8 |
43 | - .type x0040000000000000,@object |
44 | - .size x0040000000000000,8 |
45 | -x0040000000000000: |
46 | - .long 0, 0x00400000 |
47 | - .align 8 |
48 | .type x5a825a825a825a82,@object |
49 | .size x5a825a825a825a82,8 |
50 | x5a825a825a825a82: |
51 | @@ -80,8 +75,21 @@ scratch7: |
52 | x0: |
53 | .long 0,0 |
54 | .align 8 |
55 | + |
56 | .text |
57 | .align 4 |
58 | + |
59 | +#ifdef __PIC__ |
60 | +# undef __i686 /* gcc define gets in our way */ |
61 | +# define MUNG(sym) sym ## @GOTOFF(%ebx) |
62 | +# define INIT_PIC() \ |
63 | + call __i686.get_pc_thunk.bx ; \ |
64 | + addl $_GLOBAL_OFFSET_TABLE_, %ebx |
65 | +#else |
66 | +# define MUNG(sym) sym |
67 | +# define INIT_PIC() |
68 | +#endif |
69 | + |
70 | .globl IDCT_mmx |
71 | .type IDCT_mmx,@function |
72 | IDCT_mmx: |
73 | @@ -92,8 +100,9 @@ IDCT_mmx: |
74 | pushl %edx |
75 | pushl %esi |
76 | pushl %edi |
77 | + INIT_PIC() |
78 | movl 8(%ebp),%esi /* source matrix */ |
79 | - leal preSC, %ecx |
80 | + leal MUNG(preSC), %ecx |
81 | /* column 0: even part |
82 | * use V4, V12, V0, V8 to produce V22..V25 |
83 | */ |
84 | @@ -109,7 +118,7 @@ IDCT_mmx: |
85 | movq %mm1, %mm2 /* added 11/1/96 */ |
86 | pmulhw 8*8(%esi),%mm5 /* V8 */ |
87 | psubsw %mm0, %mm1 /* V16 */ |
88 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ |
89 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */ |
90 | paddsw %mm0, %mm2 /* V17 */ |
91 | movq %mm2, %mm0 /* duplicate V17 */ |
92 | psraw $1, %mm2 /* t75=t82 */ |
93 | @@ -150,7 +159,7 @@ IDCT_mmx: |
94 | paddsw %mm0, %mm3 /* V29 ; free mm0 */ |
95 | movq %mm7, %mm1 /* duplicate V26 */ |
96 | psraw $1, %mm3 /* t91=t94 */ |
97 | - pmulhw x539f539f539f539f,%mm7 /* V33 */ |
98 | + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */ |
99 | psraw $1, %mm1 /* t96 */ |
100 | movq %mm5, %mm0 /* duplicate V2 */ |
101 | psraw $2, %mm4 /* t85=t87 */ |
102 | @@ -158,15 +167,15 @@ IDCT_mmx: |
103 | psubsw %mm4, %mm0 /* V28 ; free mm4 */ |
104 | movq %mm0, %mm2 /* duplicate V28 */ |
105 | psraw $1, %mm5 /* t90=t93 */ |
106 | - pmulhw x4546454645464546,%mm0 /* V35 */ |
107 | + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */ |
108 | psraw $1, %mm2 /* t97 */ |
109 | movq %mm5, %mm4 /* duplicate t90=t93 */ |
110 | psubsw %mm2, %mm1 /* V32 ; free mm2 */ |
111 | - pmulhw x61f861f861f861f8,%mm1 /* V36 */ |
112 | + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */ |
113 | psllw $1, %mm7 /* t107 */ |
114 | paddsw %mm3, %mm5 /* V31 */ |
115 | psubsw %mm3, %mm4 /* V30 ; free mm3 */ |
116 | - pmulhw x5a825a825a825a82,%mm4 /* V34 */ |
117 | + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */ |
118 | nop |
119 | psubsw %mm1, %mm0 /* V38 */ |
120 | psubsw %mm7, %mm1 /* V37 ; free mm7 */ |
121 | @@ -233,7 +242,7 @@ IDCT_mmx: |
122 | psubsw %mm7, %mm1 /* V50 */ |
123 | pmulhw 8*9(%esi), %mm5 /* V9 */ |
124 | paddsw %mm7, %mm2 /* V51 */ |
125 | - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ |
126 | + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */ |
127 | movq %mm2, %mm6 /* duplicate V51 */ |
128 | psraw $1, %mm2 /* t138=t144 */ |
129 | movq %mm3, %mm4 /* duplicate V1 */ |
130 | @@ -274,11 +283,11 @@ IDCT_mmx: |
131 | * even more by doing the correction step in a later stage when the number |
132 | * is actually multiplied by 16 |
133 | */ |
134 | - paddw x0005000200010001, %mm4 |
135 | + paddw MUNG(x0005000200010001), %mm4 |
136 | psubsw %mm6, %mm3 /* V60 ; free mm6 */ |
137 | psraw $1, %mm0 /* t154=t156 */ |
138 | movq %mm3, %mm1 /* duplicate V60 */ |
139 | - pmulhw x539f539f539f539f, %mm1 /* V67 */ |
140 | + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */ |
141 | movq %mm5, %mm6 /* duplicate V3 */ |
142 | psraw $2, %mm4 /* t148=t150 */ |
143 | paddsw %mm4, %mm5 /* V61 */ |
144 | @@ -287,13 +296,13 @@ IDCT_mmx: |
145 | psllw $1, %mm1 /* t169 */ |
146 | paddsw %mm0, %mm5 /* V65 -> result */ |
147 | psubsw %mm0, %mm4 /* V64 ; free mm0 */ |
148 | - pmulhw x5a825a825a825a82, %mm4 /* V68 */ |
149 | + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */ |
150 | psraw $1, %mm3 /* t158 */ |
151 | psubsw %mm6, %mm3 /* V66 */ |
152 | movq %mm5, %mm2 /* duplicate V65 */ |
153 | - pmulhw x61f861f861f861f8, %mm3 /* V70 */ |
154 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */ |
155 | psllw $1, %mm6 /* t165 */ |
156 | - pmulhw x4546454645464546, %mm6 /* V69 */ |
157 | + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */ |
158 | psraw $1, %mm2 /* t172 */ |
159 | /* moved from next block */ |
160 | movq 8*5(%esi), %mm0 /* V56 */ |
161 | @@ -418,7 +427,7 @@ IDCT_mmx: |
162 | * movq 8*13(%esi), %mm4 tmt13 |
163 | */ |
164 | psubsw %mm4, %mm3 /* V134 */ |
165 | - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ |
166 | + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */ |
167 | movq 8*9(%esi), %mm6 /* tmt9 */ |
168 | paddsw %mm4, %mm5 /* V135 ; mm4 free */ |
169 | movq %mm0, %mm4 /* duplicate tmt1 */ |
170 | @@ -447,17 +456,17 @@ IDCT_mmx: |
171 | psubsw %mm7, %mm0 /* V144 */ |
172 | movq %mm0, %mm3 /* duplicate V144 */ |
173 | paddsw %mm7, %mm2 /* V147 ; free mm7 */ |
174 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ |
175 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */ |
176 | movq %mm1, %mm7 /* duplicate tmt3 */ |
177 | paddsw %mm5, %mm7 /* V145 */ |
178 | psubsw %mm5, %mm1 /* V146 ; free mm5 */ |
179 | psubsw %mm1, %mm3 /* V150 */ |
180 | movq %mm7, %mm5 /* duplicate V145 */ |
181 | - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ |
182 | + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */ |
183 | psubsw %mm2, %mm5 /* V148 */ |
184 | - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ |
185 | + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */ |
186 | psllw $2, %mm0 /* t311 */ |
187 | - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ |
188 | + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */ |
189 | paddsw %mm2, %mm7 /* V149 ; free mm2 */ |
190 | psllw $1, %mm1 /* t313 */ |
191 | nop /* without the nop - freeze here for one clock */ |
192 | @@ -483,7 +492,7 @@ IDCT_mmx: |
193 | paddsw %mm3, %mm6 /* V164 ; free mm3 */ |
194 | movq %mm4, %mm3 /* duplicate V142 */ |
195 | psubsw %mm5, %mm4 /* V165 ; free mm5 */ |
196 | - movq %mm2, scratch7 /* out7 */ |
197 | + movq %mm2, MUNG(scratch7) /* out7 */ |
198 | psraw $4, %mm6 |
199 | psraw $4, %mm4 |
200 | paddsw %mm5, %mm3 /* V162 */ |
201 | @@ -494,11 +503,11 @@ IDCT_mmx: |
202 | */ |
203 | movq %mm6, 8*9(%esi) /* out9 */ |
204 | paddsw %mm1, %mm0 /* V161 */ |
205 | - movq %mm3, scratch5 /* out5 */ |
206 | + movq %mm3, MUNG(scratch5) /* out5 */ |
207 | psubsw %mm1, %mm5 /* V166 ; free mm1 */ |
208 | movq %mm4, 8*11(%esi) /* out11 */ |
209 | psraw $4, %mm5 |
210 | - movq %mm0, scratch3 /* out3 */ |
211 | + movq %mm0, MUNG(scratch3) /* out3 */ |
212 | movq %mm2, %mm4 /* duplicate V140 */ |
213 | movq %mm5, 8*13(%esi) /* out13 */ |
214 | paddsw %mm7, %mm2 /* V160 */ |
215 | @@ -508,7 +517,7 @@ IDCT_mmx: |
216 | /* moved from the next block */ |
217 | movq 8*3(%esi), %mm7 |
218 | psraw $4, %mm4 |
219 | - movq %mm2, scratch1 /* out1 */ |
220 | + movq %mm2, MUNG(scratch1) /* out1 */ |
221 | /* moved from the next block */ |
222 | movq %mm0, %mm1 |
223 | movq %mm4, 8*15(%esi) /* out15 */ |
224 | @@ -565,15 +574,15 @@ IDCT_mmx: |
225 | paddsw %mm4, %mm3 /* V113 ; free mm4 */ |
226 | movq %mm0, %mm4 /* duplicate V110 */ |
227 | paddsw %mm1, %mm2 /* V111 */ |
228 | - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ |
229 | + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */ |
230 | psubsw %mm1, %mm5 /* V112 ; free mm1 */ |
231 | psubsw %mm5, %mm4 /* V116 */ |
232 | movq %mm2, %mm1 /* duplicate V111 */ |
233 | - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ |
234 | + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */ |
235 | psubsw %mm3, %mm2 /* V114 */ |
236 | - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ |
237 | + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */ |
238 | paddsw %mm3, %mm1 /* V115 ; free mm3 */ |
239 | - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ |
240 | + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */ |
241 | psllw $2, %mm0 /* t266 */ |
242 | movq %mm1, (%esi) /* save V115 */ |
243 | psllw $1, %mm5 /* t268 */ |
244 | @@ -591,7 +600,7 @@ IDCT_mmx: |
245 | movq %mm6, %mm3 /* duplicate tmt4 */ |
246 | psubsw %mm0, %mm6 /* V100 */ |
247 | paddsw %mm0, %mm3 /* V101 ; free mm0 */ |
248 | - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ |
249 | + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */ |
250 | movq %mm7, %mm5 /* duplicate tmt0 */ |
251 | movq 8*8(%esi), %mm1 /* tmt8 */ |
252 | paddsw %mm1, %mm7 /* V103 */ |
253 | @@ -625,10 +634,10 @@ IDCT_mmx: |
254 | movq 8*2(%esi), %mm3 /* V123 */ |
255 | paddsw %mm4, %mm7 /* out0 */ |
256 | /* moved up from next block */ |
257 | - movq scratch3, %mm0 |
258 | + movq MUNG(scratch3), %mm0 |
259 | psraw $4, %mm7 |
260 | /* moved up from next block */ |
261 | - movq scratch5, %mm6 |
262 | + movq MUNG(scratch5), %mm6 |
263 | psubsw %mm4, %mm1 /* out14 ; free mm4 */ |
264 | paddsw %mm3, %mm5 /* out2 */ |
265 | psraw $4, %mm1 |
266 | @@ -639,7 +648,7 @@ IDCT_mmx: |
267 | movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ |
268 | psraw $4, %mm2 |
269 | /* moved up to the prev block */ |
270 | - movq scratch7, %mm4 |
271 | + movq MUNG(scratch7), %mm4 |
272 | /* moved up to the prev block */ |
273 | psraw $4, %mm0 |
274 | movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ |
275 | @@ -647,13 +656,13 @@ IDCT_mmx: |
276 | psraw $4, %mm6 |
277 | /* move back the data to its correct place |
278 | * moved up to the prev block |
279 | - * movq scratch3, %mm0 |
280 | - * movq scratch5, %mm6 |
281 | - * movq scratch7, %mm4 |
282 | + * movq MUNG(scratch3), %mm0 |
283 | + * movq MUNG(scratch5), %mm6 |
284 | + * movq MUNG(scratch7), %mm4 |
285 | * psraw $4, %mm0 |
286 | * psraw $4, %mm6 |
287 | */ |
288 | - movq scratch1, %mm1 |
289 | + movq MUNG(scratch1), %mm1 |
290 | psraw $4, %mm4 |
291 | movq %mm0, 8*3(%esi) /* out3 */ |
292 | psraw $4, %mm1 |
293 | @@ -671,6 +680,15 @@ IDCT_mmx: |
294 | .Lfe1: |
295 | .size IDCT_mmx,.Lfe1-IDCT_mmx |
296 | |
297 | +#ifdef __PIC__ |
298 | + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits |
299 | +.globl __i686.get_pc_thunk.bx |
300 | + .hidden __i686.get_pc_thunk.bx |
301 | + .type __i686.get_pc_thunk.bx,@function |
302 | + __i686.get_pc_thunk.bx: |
303 | + movl (%esp), %ebx |
304 | + ret |
305 | +#endif |
306 | |
307 | #endif /* i386 && USE_MMX */ |
308 |