diff -urp libtheora-1.0alpha6-old/lib/x86_32/dsp_mmx.c libtheora-1.0alpha6/lib/x86_32/dsp_mmx.c --- libtheora-1.0alpha6-old/lib/x86_32/dsp_mmx.c 2006-05-27 19:09:12.000000000 +0200 +++ libtheora-1.0alpha6/lib/x86_32/dsp_mmx.c 2006-07-29 17:37:35.000000000 +0200 @@ -79,7 +79,7 @@ static void sub8x8_128__mmx (unsigned ch " .balign 16 \n\t" " pxor %%mm7, %%mm7 \n\t" - " movq "M(V128)", %%mm1 \n\t" + " movq %3, %%mm1 \n\t" ".rept 8 \n\t" " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ @@ -99,7 +99,8 @@ static void sub8x8_128__mmx (unsigned ch : "+r" (FiltPtr), "+r" (DctInputPtr) - : "m" (PixelsPerLine) + : "m" (PixelsPerLine), + "m" (V128) : "memory" ); } diff -urp libtheora-1.0alpha6-old/lib/x86_32/fdct_mmx.c libtheora-1.0alpha6/lib/x86_32/fdct_mmx.c --- libtheora-1.0alpha6-old/lib/x86_32/fdct_mmx.c 2006-05-27 19:09:12.000000000 +0200 +++ libtheora-1.0alpha6/lib/x86_32/fdct_mmx.c 2006-07-29 17:52:20.000000000 +0200 @@ -74,7 +74,7 @@ static const __attribute__ ((aligned(8), " movq %%mm2, %%mm0 \n\t" /* make a copy */ \ " paddsw %%mm2, %%mm3 \n\t" /* mm3 = is0734 + is1256 */ \ \ - " pmulhw "M(xC4S4)", %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \ + " pmulhw %6, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \ " paddw %%mm2, %%mm0 \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \ " psrlw $15, %%mm2 \n\t" \ " paddw %%mm2, %%mm0 \n\t" /* Truncate mm0, now it is op[4] */ \ @@ -83,7 +83,7 @@ static const __attribute__ ((aligned(8), " movq %%mm0," #ip4 " \n\t" /* save ip4, now mm0,mm2 are free */ \ \ " movq %%mm3, %%mm0 \n\t" \ - " pmulhw "M(xC4S4)", %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \ + " pmulhw %6, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \ \ " psrlw $15, %%mm2 \n\t" \ " paddw %%mm0, %%mm3 \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) */ \ @@ -92,7 +92,7 @@ static const __attribute__ ((aligned(8), " movq %%mm3," #ip0 " \n\t" \ /* ------------------------------------------------------------------- */ \ " movq " #temp ", %%mm3 \n\t" /* mm3 = irot_input_y */ \ - " pmulhw "M(xC2S6)", %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \ + " pmulhw %4, %%mm3 \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \ \ " movq " #temp ", %%mm2 \n\t" \ " movq %%mm2, %%mm0 \n\t" \ @@ -104,7 +104,7 @@ static const __attribute__ ((aligned(8), " movq %%mm5, %%mm0 \n\t" \ \ " movq %%mm5, %%mm2 \n\t" \ - " pmulhw "M(xC6S2)", %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \ + " pmulhw %8, %%mm0 \n\t" /* mm0 = xC6S2 * irot_input_x */ \ \ " psrlw $15, %%mm2 \n\t" \ " paddw %%mm2, %%mm0 \n\t" /* Truncated */ \ @@ -115,7 +115,7 @@ static const __attribute__ ((aligned(8), " movq %%mm5, %%mm0 \n\t" \ " movq %%mm5, %%mm2 \n\t" \ \ - " pmulhw "M(xC2S6)", %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \ + " pmulhw %4, %%mm5 \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \ " psrlw $15, %%mm2 \n\t" \ \ " movq " #temp ", %%mm3 \n\t" \ @@ -124,7 +124,7 @@ static const __attribute__ ((aligned(8), " paddw %%mm2, %%mm5 \n\t" /* Truncated */ \ " movq %%mm3, %%mm2 \n\t" \ \ - " pmulhw "M(xC6S2)", %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \ + " pmulhw %8, %%mm3 \n\t" /* mm3 = xC6S2 * irot_input_y */ \ " psrlw $15, %%mm2 \n\t" \ \ " paddw %%mm2, %%mm3 \n\t" /* Truncated */ \ @@ -132,7 +132,7 @@ static const __attribute__ ((aligned(8), \ " movq %%mm3," #ip6 " \n\t" \ /* ------------------------------------------------------------------- */ \ - " movq "M(xC4S4)", %%mm0 \n\t" \ + " movq %6, %%mm0 \n\t" \ " movq %%mm1, %%mm2 \n\t" \ " movq %%mm1, %%mm3 \n\t" \ \ @@ -162,13 +162,13 @@ static const __attribute__ ((aligned(8), " paddsw %%mm1, %%mm1 \n\t" \ " paddsw %%mm4, %%mm1 \n\t" /* mm1 = id07 + icommon_product1 */ \ /* ------------------------------------------------------------------- */ \ - " movq "M(xC1S7)", %%mm7 \n\t" \ + " movq %3, %%mm7 \n\t" \ " movq %%mm1, %%mm2 \n\t" \ \ " movq %%mm1, %%mm3 \n\t" \ " pmulhw %%mm7, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \ \ - " movq "M(xC7S1)", %%mm7 \n\t" \ + " movq %9, %%mm7 \n\t" \ " psrlw $15, %%mm2 \n\t" \ \ " paddw %%mm3, %%mm1 \n\t" /* mm1 = xC1S7 * irot_input_x */ \ @@ -180,10 +180,10 @@ static const __attribute__ ((aligned(8), " movq %%mm0, %%mm5 \n\t" \ " movq %%mm0, %%mm2 \n\t" \ \ - " movq "M(xC1S7)", %%mm7 \n\t" \ + " movq %3, %%mm7 \n\t" \ " pmulhw %%mm7, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \ \ - " movq "M(xC7S1)", %%mm7 \n\t" \ + " movq %9, %%mm7 \n\t" \ " psrlw $15, %%mm2 \n\t" \ \ " paddw %%mm5, %%mm0 \n\t" /* mm0 = xC1S7 * irot_input_y */ \ @@ -198,8 +198,8 @@ static const __attribute__ ((aligned(8), " movq %%mm1," #ip1 " \n\t" \ " movq %%mm3," #ip7 " \n\t" \ /* ------------------------------------------------------------------- */ \ - " movq "M(xC3S5)", %%mm0 \n\t" \ - " movq "M(xC5S3)", %%mm1 \n\t" \ + " movq %5, %%mm0 \n\t" \ + " movq %7, %%mm1 \n\t" \ \ " movq %%mm6, %%mm5 \n\t" \ " movq %%mm6, %%mm7 \n\t" \ @@ -320,7 +320,14 @@ static void fdct_short__mmx ( ogg_int16_ : "+r" (InputData), "+r" (OutputData) - : "r" (temp) + : "r" (temp), + "m" (xC1S7), + "m" (xC2S6), + "m" (xC3S5), + "m" (xC4S4), + "m" (xC5S3), + "m" (xC6S2), + "m" (xC7S1) : "memory" ); } diff -urp libtheora-1.0alpha6-old/lib/x86_32/recon_mmx.c libtheora-1.0alpha6/lib/x86_32/recon_mmx.c --- libtheora-1.0alpha6-old/lib/x86_32/recon_mmx.c 2006-05-27 19:09:12.000000000 +0200 +++ libtheora-1.0alpha6/lib/x86_32/recon_mmx.c 2006-07-29 17:39:05.000000000 +0200 @@ -71,7 +71,7 @@ static void recon_intra8x8__mmx (unsigne __asm__ __volatile__ ( " .balign 16 \n\t" - " movq "M(V128)", %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */ + " movq %3, %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */ " lea 128(%1), %%edi \n\t" /* Endpoint in input buffer */ "1: \n\t" @@ -89,7 +89,8 @@ static void recon_intra8x8__mmx (unsigne " jc 1b \n\t" /* Loop back if we are not done */ : "+r" (ReconPtr) : "r" (ChangePtr), - "r" (LineStep) + "r" (LineStep), + "m" (V128) : "memory", "edi" ); }