Contents of /trunk/libtheora/patches/libtheora-1.0_beta2-pic.patch
Parent Directory | Revision Log
Revision 441 -
(show annotations)
(download)
Tue Jan 8 14:54:25 2008 UTC (16 years, 8 months ago) by niro
File size: 4578 byte(s)
Tue Jan 8 14:54:25 2008 UTC (16 years, 8 months ago) by niro
File size: 4578 byte(s)
-build fixes for 64bit arches
1 | diff -urp libtheora-1.0beta2-orig/lib/enc/x86_32/dct_decode_mmx.c libtheora-1.0beta2/lib/enc/x86_32/dct_decode_mmx.c |
2 | --- libtheora-1.0beta2-orig/lib/enc/x86_32/dct_decode_mmx.c 2007-10-04 20:37:01.000000000 +0200 |
3 | +++ libtheora-1.0beta2/lib/enc/x86_32/dct_decode_mmx.c 2007-12-17 10:32:44.000000000 +0100 |
4 | @@ -57,9 +57,9 @@ static void FilterHoriz__mmx(unsigned ch |
5 | "psubw %%mm3,%%mm1\n" /* mm1 = pix[0]-pix[3] mm1 - mm3 */ \ |
6 | "movq %%mm0,%%mm7\n" /* mm7 = pix[2]*/ \ |
7 | "psubw %%mm5,%%mm0\n" /* mm0 = pix[2]-pix[1] mm0 - mm5*/ \ |
8 | - "PMULLW "MANGLE(V3)",%%mm0\n" /* *3 */ \ |
9 | + "PMULLW %3,%%mm0\n" /* *3 */ \ |
10 | "paddw %%mm0,%%mm1\n" /* mm1 has f[0] ... f[4]*/ \ |
11 | - "paddw "MANGLE(V804)",%%mm1\n"/* add 4 */ /* add 256 after shift */ \ |
12 | + "paddw %4,%%mm1\n"/* add 4 */ /* add 256 after shift */ \ |
13 | "psraw $3,%%mm1\n" /* >>3 */ \ |
14 | " pextrw $0,%%mm1,%%esi\n" /* In MM1 we have 4 f coefs (16bits) */ \ |
15 | " pextrw $1,%%mm1,%%edi\n" /* now perform MM4 = *(_bv+ f) */ \ |
16 | @@ -75,20 +75,19 @@ static void FilterHoriz__mmx(unsigned ch |
17 | " packuswb %%mm0,%%mm5\n" /* mm5 = x x x x newpix1 */ \ |
18 | " packuswb %%mm0,%%mm7\n" /* mm7 = x x x x newpix2 */ \ |
19 | " punpcklbw %%mm7,%%mm5\n" /* 2 1 2 1 2 1 2 1 */ \ |
20 | - " movd %%mm5,%%eax\n" /* eax = newpix21 */ \ |
21 | - " movw %%ax,1(%0)\n" \ |
22 | + " movd %%mm5,%%edi\n" /* eax = newpix21 */ \ |
23 | + " movw %%di,1(%0)\n" \ |
24 | " psrlq $32,%%mm5\n" /* why is so big stall here ? */ \ |
25 | - " shrl $16,%%eax\n" \ |
26 | - " lea 1(%0,%1,2),%%edi\n" \ |
27 | - " movw %%ax,1(%0,%1,1)\n" \ |
28 | - " movd %%mm5,%%eax\n" /* eax = newpix21 high part */ \ |
29 | + " shrl $16,%%edi\n" \ |
30 | + " movw %%di,1(%0,%1,1)\n" \ |
31 | + " movd %%mm5,%%edi\n" /* eax = newpix21 high part */ \ |
32 | " lea (%1,%1,2),%%esi\n" \ |
33 | - " movw %%ax,(%%edi)\n" \ |
34 | - " shrl $16,%%eax\n" \ |
35 | - " movw %%ax,1(%0,%%esi)\n" \ |
36 | + " movw %%di,1(%0,%1,2)\n" \ |
37 | + " shrl $16,%%edi\n" \ |
38 | + " movw %%di,1(%0,%%esi)\n" \ |
39 | : \ |
40 | - : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256) \ |
41 | - : "esi", "edi" , "memory", "eax" \ |
42 | + : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804) \ |
43 | + : "esi", "edi" , "memory" \ |
44 | ); |
45 | |
46 | OC_LOOP_H_4x4 |
47 | @@ -126,12 +125,12 @@ static void FilterVert__mmx(unsigned cha |
48 | "psubw %%mm5,%%mm3\n" |
49 | "psubw %%mm4,%%mm2\n" |
50 | /* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */ |
51 | - "PMULLW "MANGLE(V3)",%%mm3\n" /* *3 */ |
52 | - "PMULLW "MANGLE(V3)",%%mm2\n" /* *3 */ |
53 | + "PMULLW %3,%%mm3\n" /* *3 */ |
54 | + "PMULLW %3,%%mm2\n" /* *3 */ |
55 | "paddw %%mm7,%%mm3\n" /* highpart */ |
56 | "paddw %%mm6,%%mm2\n" /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]); */ |
57 | - "paddw "MANGLE(V804)",%%mm3\n" /* add 4 */ /* add 256 after shift */ |
58 | - "paddw "MANGLE(V804)",%%mm2\n" /* add 4 */ /* add 256 after shift */ |
59 | + "paddw %4,%%mm3\n" /* add 4 */ /* add 256 after shift */ |
60 | + "paddw %4,%%mm2\n" /* add 4 */ /* add 256 after shift */ |
61 | "psraw $3,%%mm3\n" /* >>3 f coefs high */ |
62 | "psraw $3,%%mm2\n" /* >>3 f coefs low */ |
63 | |
64 | @@ -168,7 +167,7 @@ static void FilterVert__mmx(unsigned cha |
65 | "movq %%mm4,(%0,%1)\n" /* pix[ystride]= */ |
66 | "emms\n" |
67 | : |
68 | - : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256) |
69 | + : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804) |
70 | : "esi", "edi" , "memory" |
71 | ); |
72 | } |