34 # define RENAME(a) a ## _C
36 # define TEMPLATE_PP_C 0
39 #ifdef TEMPLATE_PP_ALTIVEC
40 # define RENAME(a) a ## _altivec
42 # define TEMPLATE_PP_ALTIVEC 0
45 #ifdef TEMPLATE_PP_MMX
46 # define RENAME(a) a ## _MMX
48 # define TEMPLATE_PP_MMX 0
51 #ifdef TEMPLATE_PP_MMXEXT
52 # undef TEMPLATE_PP_MMX
53 # define TEMPLATE_PP_MMX 1
54 # define RENAME(a) a ## _MMX2
56 # define TEMPLATE_PP_MMXEXT 0
59 #ifdef TEMPLATE_PP_3DNOW
60 # undef TEMPLATE_PP_MMX
61 # define TEMPLATE_PP_MMX 1
62 # define RENAME(a) a ## _3DNow
64 # define TEMPLATE_PP_3DNOW 0
67 #ifdef TEMPLATE_PP_SSE2
68 # undef TEMPLATE_PP_MMX
69 # define TEMPLATE_PP_MMX 1
70 # undef TEMPLATE_PP_MMXEXT
71 # define TEMPLATE_PP_MMXEXT 1
72 # define RENAME(a) a ## _SSE2
74 # define TEMPLATE_PP_SSE2 0
82 #if TEMPLATE_PP_MMXEXT
83 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
84 #elif TEMPLATE_PP_3DNOW
85 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
87 #define PAVGB(a,b) REAL_PAVGB(a,b)
89 #if TEMPLATE_PP_MMXEXT
90 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
92 #define PMINUB(b,a,t) \
93 "movq " #a ", " #t " \n\t"\
94 "psubusb " #b ", " #t " \n\t"\
95 "psubb " #t ", " #a " \n\t"
98 #if TEMPLATE_PP_MMXEXT
99 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
100 #elif TEMPLATE_PP_MMX
101 #define PMAXUB(a,b) \
102 "psubusb " #a ", " #b " \n\t"\
103 "paddb " #a ", " #b " \n\t"
115 "movq %0, %%mm7 \n\t"
116 "movq %1, %%mm6 \n\t"
117 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
121 "lea (%2, %3), %%"REG_a
" \n\t"
125 "movq (%2), %%mm0 \n\t"
126 "movq (%%"REG_a
"), %%mm1 \n\t"
127 "movq %%mm0, %%mm3 \n\t"
128 "movq %%mm0, %%mm4 \n\t"
130 PMINUB(%%mm1, %%mm3, %%mm5)
131 "psubb %%mm1, %%mm0 \n\t"
132 "paddb %%mm7, %%mm0 \n\t"
133 "pcmpgtb %%mm6, %%mm0 \n\t"
135 "movq (%%"REG_a
",%3), %%mm2 \n\t"
137 PMINUB(%%mm2, %%mm3, %%mm5)
138 "psubb %%mm2, %%mm1 \n\t"
139 "paddb %%mm7, %%mm1 \n\t"
140 "pcmpgtb %%mm6, %%mm1 \n\t"
141 "paddb %%mm1, %%mm0 \n\t"
143 "movq (%%"REG_a
", %3, 2), %%mm1 \n\t"
145 PMINUB(%%mm1, %%mm3, %%mm5)
146 "psubb %%mm1, %%mm2 \n\t"
147 "paddb %%mm7, %%mm2 \n\t"
148 "pcmpgtb %%mm6, %%mm2 \n\t"
149 "paddb %%mm2, %%mm0 \n\t"
151 "lea (%%"REG_a
", %3, 4), %%"REG_a
" \n\t"
153 "movq (%2, %3, 4), %%mm2 \n\t"
155 PMINUB(%%mm2, %%mm3, %%mm5)
156 "psubb %%mm2, %%mm1 \n\t"
157 "paddb %%mm7, %%mm1 \n\t"
158 "pcmpgtb %%mm6, %%mm1 \n\t"
159 "paddb %%mm1, %%mm0 \n\t"
161 "movq (%%"REG_a
"), %%mm1 \n\t"
163 PMINUB(%%mm1, %%mm3, %%mm5)
164 "psubb %%mm1, %%mm2 \n\t"
165 "paddb %%mm7, %%mm2 \n\t"
166 "pcmpgtb %%mm6, %%mm2 \n\t"
167 "paddb %%mm2, %%mm0 \n\t"
169 "movq (%%"REG_a
", %3), %%mm2 \n\t"
171 PMINUB(%%mm2, %%mm3, %%mm5)
172 "psubb %%mm2, %%mm1 \n\t"
173 "paddb %%mm7, %%mm1 \n\t"
174 "pcmpgtb %%mm6, %%mm1 \n\t"
175 "paddb %%mm1, %%mm0 \n\t"
177 "movq (%%"REG_a
", %3, 2), %%mm1 \n\t"
179 PMINUB(%%mm1, %%mm3, %%mm5)
180 "psubb %%mm1, %%mm2 \n\t"
181 "paddb %%mm7, %%mm2 \n\t"
182 "pcmpgtb %%mm6, %%mm2 \n\t"
183 "paddb %%mm2, %%mm0 \n\t"
184 "psubusb %%mm3, %%mm4 \n\t"
187 #if TEMPLATE_PP_MMXEXT
188 "pxor %%mm7, %%mm7 \n\t"
189 "psadbw %%mm7, %%mm0 \n\t"
191 "movq %%mm0, %%mm1 \n\t"
192 "psrlw $8, %%mm0 \n\t"
193 "paddb %%mm1, %%mm0 \n\t"
194 "movq %%mm0, %%mm1 \n\t"
195 "psrlq $16, %%mm0 \n\t"
196 "paddb %%mm1, %%mm0 \n\t"
197 "movq %%mm0, %%mm1 \n\t"
198 "psrlq $32, %%mm0 \n\t"
199 "paddb %%mm1, %%mm0 \n\t"
201 "movq %4, %%mm7 \n\t"
202 "paddusb %%mm7, %%mm7 \n\t"
203 "psubusb %%mm7, %%mm4 \n\t"
204 "packssdw %%mm4, %%mm4 \n\t"
205 "movd %%mm0, %0 \n\t"
206 "movd %%mm4, %1 \n\t"
208 :
"=r" (numEq),
"=r" (dcOk)
213 numEq= (-numEq) &0xFF;
214 if(numEq >
c->ppMode.flatnessThreshold){
221 #endif //TEMPLATE_PP_MMX
227 #if !TEMPLATE_PP_ALTIVEC
230 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
233 "movq %2, %%mm0 \n\t"
234 "pxor %%mm4, %%mm4 \n\t"
236 "movq (%0), %%mm6 \n\t"
237 "movq (%0, %1), %%mm5 \n\t"
238 "movq %%mm5, %%mm1 \n\t"
239 "movq %%mm6, %%mm2 \n\t"
240 "psubusb %%mm6, %%mm5 \n\t"
241 "psubusb %%mm1, %%mm2 \n\t"
242 "por %%mm5, %%mm2 \n\t"
243 "psubusb %%mm0, %%mm2 \n\t"
244 "pcmpeqb %%mm4, %%mm2 \n\t"
246 "pand %%mm2, %%mm6 \n\t"
247 "pandn %%mm1, %%mm2 \n\t"
248 "por %%mm2, %%mm6 \n\t"
250 "movq (%0, %1, 8), %%mm5 \n\t"
251 "lea (%0, %1, 4), %%"REG_a
" \n\t"
252 "lea (%0, %1, 8), %%"REG_c
" \n\t"
253 "sub %1, %%"REG_c
" \n\t"
255 "movq (%0, %1, 8), %%mm7 \n\t"
256 "movq %%mm5, %%mm1 \n\t"
257 "movq %%mm7, %%mm2 \n\t"
258 "psubusb %%mm7, %%mm5 \n\t"
259 "psubusb %%mm1, %%mm2 \n\t"
260 "por %%mm5, %%mm2 \n\t"
261 "psubusb %%mm0, %%mm2 \n\t"
262 "pcmpeqb %%mm4, %%mm2 \n\t"
264 "pand %%mm2, %%mm7 \n\t"
265 "pandn %%mm1, %%mm2 \n\t"
266 "por %%mm2, %%mm7 \n\t"
275 "movq (%0, %1), %%mm0 \n\t"
276 "movq %%mm0, %%mm1 \n\t"
280 "movq (%0, %1, 4), %%mm2 \n\t"
281 "movq %%mm2, %%mm5 \n\t"
282 PAVGB((%%REGa), %%mm2)
283 PAVGB((%0, %1, 2), %%mm2)
284 "movq %%mm2, %%mm3 \n\t"
285 "movq (%0), %%mm4 \n\t"
288 "movq %%mm3, (%0) \n\t"
290 "movq %%mm1, %%mm0 \n\t"
292 "movq %%mm4, %%mm3 \n\t"
293 PAVGB((%0,%1,2), %%mm3)
294 PAVGB((%%REGa,%1,2), %%mm5)
295 PAVGB((%%REGa), %%mm5)
298 "movq %%mm3, (%0,%1) \n\t"
301 "movq (%%"REG_c
"), %%mm0 \n\t"
302 PAVGB((%%REGa, %1, 2), %%mm0)
303 "movq %%mm0, %%mm3 \n\t"
307 "movq (%0, %1, 2), %%mm2 \n\t"
308 "movq %%mm0, (%0, %1, 2) \n\t"
310 "movq (%%"REG_a
", %1, 4), %%mm0 \n\t"
311 PAVGB((%%REGc), %%mm0)
317 "movq (%%"REG_a
"), %%mm5 \n\t"
318 "movq %%mm6, (%%"REG_a
") \n\t"
320 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t"
325 "movq (%0, %1, 4), %%mm4 \n\t"
328 "movq %%mm6, (%0, %1, 4) \n\t"
333 "movq (%%"REG_a
", %1, 2), %%mm6 \n\t"
336 "movq %%mm1, (%%"REG_a
", %1, 2) \n\t"
338 PAVGB((%%REGc), %%mm2)
339 "movq (%%"REG_a
", %1, 4), %%mm0 \n\t"
343 "movq %%mm6, (%%"REG_c
") \n\t"
350 "movq %%mm5, (%%"REG_a
", %1, 4) \n\t"
357 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
359 const int l2=
stride + l1;
360 const int l3=
stride + l2;
361 const int l4=
stride + l3;
362 const int l5=
stride + l4;
363 const int l6=
stride + l5;
364 const int l7=
stride + l6;
365 const int l8=
stride + l7;
366 const int l9=
stride + l8;
374 sums[0] = 4*first +
src[l1] +
src[l2] +
src[l3] + 4;
375 sums[1] = sums[0] - first + src[l4];
376 sums[2] = sums[1] - first + src[l5];
377 sums[3] = sums[2] - first + src[l6];
378 sums[4] = sums[3] - first + src[l7];
379 sums[5] = sums[4] - src[l1] + src[l8];
380 sums[6] = sums[5] - src[l2] + last;
381 sums[7] = sums[6] - src[l3] + last;
382 sums[8] = sums[7] - src[l4] + last;
383 sums[9] = sums[8] - src[l5] + last;
385 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
386 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
387 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
388 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
389 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
390 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
391 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
392 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
396 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
398 #endif //TEMPLATE_PP_ALTIVEC
409 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
413 "pxor %%mm7, %%mm7 \n\t"
414 "lea (%0, %1), %%"REG_a
" \n\t"
415 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t"
418 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t"
419 "movq (%0, %1, 4), %%mm1 \n\t"
420 "movq %%mm1, %%mm2 \n\t"
421 "psubusb %%mm0, %%mm1 \n\t"
422 "psubusb %%mm2, %%mm0 \n\t"
423 "por %%mm1, %%mm0 \n\t"
424 "movq (%%"REG_c
"), %%mm3 \n\t"
425 "movq (%%"REG_c
", %1), %%mm4 \n\t"
426 "movq %%mm3, %%mm5 \n\t"
427 "psubusb %%mm4, %%mm3 \n\t"
428 "psubusb %%mm5, %%mm4 \n\t"
429 "por %%mm4, %%mm3 \n\t"
431 "movq %%mm2, %%mm1 \n\t"
432 "psubusb %%mm5, %%mm2 \n\t"
433 "movq %%mm2, %%mm4 \n\t"
434 "pcmpeqb %%mm7, %%mm2 \n\t"
435 "psubusb %%mm1, %%mm5 \n\t"
436 "por %%mm5, %%mm4 \n\t"
437 "psubusb %%mm0, %%mm4 \n\t"
438 "movq %%mm4, %%mm3 \n\t"
439 "movq %2, %%mm0 \n\t"
440 "paddusb %%mm0, %%mm0 \n\t"
441 "psubusb %%mm0, %%mm4 \n\t"
442 "pcmpeqb %%mm7, %%mm4 \n\t"
443 "psubusb "MANGLE(b01)
", %%mm3 \n\t"
444 "pand %%mm4, %%mm3 \n\t"
447 "movq %%mm3, %%mm1 \n\t"
451 "movq (%0, %1, 4), %%mm0 \n\t"
452 "pxor %%mm2, %%mm0 \n\t"
453 "psubusb %%mm3, %%mm0 \n\t"
454 "pxor %%mm2, %%mm0 \n\t"
455 "movq %%mm0, (%0, %1, 4) \n\t"
457 "movq (%%"REG_c
"), %%mm0 \n\t"
458 "pxor %%mm2, %%mm0 \n\t"
459 "paddusb %%mm3, %%mm0 \n\t"
460 "pxor %%mm2, %%mm0 \n\t"
461 "movq %%mm0, (%%"REG_c
") \n\t"
465 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t"
466 "pxor %%mm2, %%mm0 \n\t"
467 "psubusb %%mm1, %%mm0 \n\t"
468 "pxor %%mm2, %%mm0 \n\t"
469 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t"
471 "movq (%%"REG_c
", %1), %%mm0 \n\t"
472 "pxor %%mm2, %%mm0 \n\t"
473 "paddusb %%mm1, %%mm0 \n\t"
474 "pxor %%mm2, %%mm0 \n\t"
475 "movq %%mm0, (%%"REG_c
", %1) \n\t"
479 "movq (%%"REG_a
", %1), %%mm0 \n\t"
480 "pxor %%mm2, %%mm0 \n\t"
481 "psubusb %%mm1, %%mm0 \n\t"
482 "pxor %%mm2, %%mm0 \n\t"
483 "movq %%mm0, (%%"REG_a
", %1) \n\t"
485 "movq (%%"REG_c
", %1, 2), %%mm0 \n\t"
486 "pxor %%mm2, %%mm0 \n\t"
487 "paddusb %%mm1, %%mm0 \n\t"
488 "pxor %%mm2, %%mm0 \n\t"
489 "movq %%mm0, (%%"REG_c
", %1, 2) \n\t"
496 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
499 const int l2=
stride + l1;
500 const int l3=
stride + l2;
501 const int l4=
stride + l3;
502 const int l5=
stride + l4;
503 const int l6=
stride + l5;
504 const int l7=
stride + l6;
512 int b= src[l4] - src[l5];
513 int c= src[l5] - src[l6];
530 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
533 #if !TEMPLATE_PP_ALTIVEC
536 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
554 #if 0 //slightly more accurate and slightly slower
555 "pxor %%mm7, %%mm7 \n\t"
556 "lea (%0, %1), %%"REG_a
" \n\t"
557 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t"
563 "movq (%0, %1, 2), %%mm0 \n\t"
564 "movq (%0), %%mm1 \n\t"
565 "movq %%mm0, %%mm2 \n\t"
570 "movq (%%"REG_a
"), %%mm1 \n\t"
571 "movq (%%"REG_a
", %1, 2), %%mm3 \n\t"
572 "movq %%mm1, %%mm4 \n\t"
577 "movq %%mm0, %%mm4 \n\t"
578 "psubusb %%mm1, %%mm0 \n\t"
579 "psubusb %%mm4, %%mm1 \n\t"
580 "por %%mm0, %%mm1 \n\t"
583 "movq (%0, %1, 4), %%mm0 \n\t"
584 "movq %%mm0, %%mm4 \n\t"
589 "movq (%%"REG_c
"), %%mm2 \n\t"
590 "movq %%mm3, %%mm5 \n\t"
595 "movq %%mm0, %%mm6 \n\t"
596 "psubusb %%mm3, %%mm0 \n\t"
597 "psubusb %%mm6, %%mm3 \n\t"
598 "por %%mm0, %%mm3 \n\t"
599 "pcmpeqb %%mm7, %%mm0 \n\t"
602 "movq (%%"REG_c
", %1), %%mm6 \n\t"
603 "movq %%mm6, %%mm5 \n\t"
608 "movq (%%"REG_c
", %1, 2), %%mm5 \n\t"
609 "movq %%mm2, %%mm4 \n\t"
614 "movq %%mm6, %%mm4 \n\t"
615 "psubusb %%mm2, %%mm6 \n\t"
616 "psubusb %%mm4, %%mm2 \n\t"
617 "por %%mm6, %%mm2 \n\t"
621 PMINUB(%%mm2, %%mm1, %%mm4)
622 "movq %2, %%mm4 \n\t"
623 "paddusb "MANGLE(b01)
", %%mm4 \n\t"
624 "pcmpgtb %%mm3, %%mm4 \n\t"
625 "psubusb %%mm1, %%mm3 \n\t"
626 "pand %%mm4, %%mm3 \n\t"
628 "movq %%mm3, %%mm1 \n\t"
632 "paddusb %%mm1, %%mm3 \n\t"
635 "movq (%%"REG_a
", %1, 2), %%mm6 \n\t"
636 "movq (%0, %1, 4), %%mm5 \n\t"
637 "movq (%0, %1, 4), %%mm4 \n\t"
638 "psubusb %%mm6, %%mm5 \n\t"
639 "psubusb %%mm4, %%mm6 \n\t"
640 "por %%mm6, %%mm5 \n\t"
641 "pcmpeqb %%mm7, %%mm6 \n\t"
642 "pxor %%mm6, %%mm0 \n\t"
643 "pand %%mm0, %%mm3 \n\t"
644 PMINUB(%%mm5, %%mm3, %%mm0)
646 "psubusb "MANGLE(b01)
", %%mm3 \n\t"
649 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t"
650 "movq (%0, %1, 4), %%mm2 \n\t"
651 "pxor %%mm6, %%mm0 \n\t"
652 "pxor %%mm6, %%mm2 \n\t"
653 "psubb %%mm3, %%mm0 \n\t"
654 "paddb %%mm3, %%mm2 \n\t"
655 "pxor %%mm6, %%mm0 \n\t"
656 "pxor %%mm6, %%mm2 \n\t"
657 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t"
658 "movq %%mm2, (%0, %1, 4) \n\t"
661 "lea (%0, %1), %%"REG_a
" \n\t"
662 "pcmpeqb %%mm6, %%mm6 \n\t"
668 "movq (%%"REG_a
", %1, 2), %%mm1 \n\t"
669 "movq (%0, %1, 4), %%mm0 \n\t"
670 "pxor %%mm6, %%mm1 \n\t"
674 "movq (%%"REG_a
", %1, 4), %%mm2 \n\t"
675 "movq (%%"REG_a
", %1), %%mm3 \n\t"
676 "pxor %%mm6, %%mm2 \n\t"
677 "movq %%mm2, %%mm5 \n\t"
678 "movq "MANGLE(b80)
", %%mm4 \n\t"
679 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t"
686 "movq (%%"REG_a
"), %%mm2 \n\t"
687 "pxor %%mm6, %%mm2 \n\t"
690 "movq "MANGLE(b80)
", %%mm3 \n\t"
696 PAVGB((%%REGc, %1), %%mm5)
697 "movq (%%"REG_c
", %1, 2), %%mm1 \n\t"
698 "pxor %%mm6, %%mm1 \n\t"
699 PAVGB((%0, %1, 4), %%mm1)
700 "movq "MANGLE(b80)
", %%mm2 \n\t"
706 "movq "MANGLE(b00)
", %%mm1 \n\t"
707 "movq "MANGLE(b00)
", %%mm5 \n\t"
708 "psubb %%mm2, %%mm1 \n\t"
709 "psubb %%mm3, %%mm5 \n\t"
712 PMINUB(%%mm2, %%mm3, %%mm1)
716 "movq "MANGLE(b00)
", %%mm7 \n\t"
717 "movq %2, %%mm2 \n\t"
719 "psubb %%mm6, %%mm2 \n\t"
721 "movq %%mm4, %%mm1 \n\t"
722 "pcmpgtb %%mm7, %%mm1 \n\t"
723 "pxor %%mm1, %%mm4 \n\t"
724 "psubb %%mm1, %%mm4 \n\t"
725 "pcmpgtb %%mm4, %%mm2 \n\t"
726 "psubusb %%mm3, %%mm4 \n\t"
729 "movq %%mm4, %%mm3 \n\t"
730 "psubusb "MANGLE(b01)
", %%mm4 \n\t"
733 "paddb %%mm3, %%mm4 \n\t"
734 "pand %%mm2, %%mm4 \n\t"
736 "movq "MANGLE(b80)
", %%mm5 \n\t"
737 "psubb %%mm0, %%mm5 \n\t"
738 "paddsb %%mm6, %%mm5 \n\t"
739 "pcmpgtb %%mm5, %%mm7 \n\t"
740 "pxor %%mm7, %%mm5 \n\t"
742 PMINUB(%%mm5, %%mm4, %%mm3)
743 "pxor %%mm1, %%mm7 \n\t"
745 "pand %%mm7, %%mm4 \n\t"
746 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t"
747 "movq (%0, %1, 4), %%mm2 \n\t"
748 "pxor %%mm1, %%mm0 \n\t"
749 "pxor %%mm1, %%mm2 \n\t"
750 "paddb %%mm4, %%mm0 \n\t"
751 "psubb %%mm4, %%mm2 \n\t"
752 "pxor %%mm1, %%mm0 \n\t"
753 "pxor %%mm1, %%mm2 \n\t"
754 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t"
755 "movq %%mm2, (%0, %1, 4) \n\t"
818 #elif TEMPLATE_PP_MMX
822 "pxor %%mm7, %%mm7 \n\t"
827 "movq (%0), %%mm0 \n\t"
828 "movq %%mm0, %%mm1 \n\t"
829 "punpcklbw %%mm7, %%mm0 \n\t"
830 "punpckhbw %%mm7, %%mm1 \n\t"
832 "movq (%0, %1), %%mm2 \n\t"
833 "lea (%0, %1, 2), %%"REG_a
" \n\t"
834 "movq %%mm2, %%mm3 \n\t"
835 "punpcklbw %%mm7, %%mm2 \n\t"
836 "punpckhbw %%mm7, %%mm3 \n\t"
838 "movq (%%"REG_a
"), %%mm4 \n\t"
839 "movq %%mm4, %%mm5 \n\t"
840 "punpcklbw %%mm7, %%mm4 \n\t"
841 "punpckhbw %%mm7, %%mm5 \n\t"
843 "paddw %%mm0, %%mm0 \n\t"
844 "paddw %%mm1, %%mm1 \n\t"
845 "psubw %%mm4, %%mm2 \n\t"
846 "psubw %%mm5, %%mm3 \n\t"
847 "psubw %%mm2, %%mm0 \n\t"
848 "psubw %%mm3, %%mm1 \n\t"
850 "psllw $2, %%mm2 \n\t"
851 "psllw $2, %%mm3 \n\t"
852 "psubw %%mm2, %%mm0 \n\t"
853 "psubw %%mm3, %%mm1 \n\t"
855 "movq (%%"REG_a
", %1), %%mm2 \n\t"
856 "movq %%mm2, %%mm3 \n\t"
857 "punpcklbw %%mm7, %%mm2 \n\t"
858 "punpckhbw %%mm7, %%mm3 \n\t"
860 "psubw %%mm2, %%mm0 \n\t"
861 "psubw %%mm3, %%mm1 \n\t"
862 "psubw %%mm2, %%mm0 \n\t"
863 "psubw %%mm3, %%mm1 \n\t"
864 "movq %%mm0, (%3) \n\t"
865 "movq %%mm1, 8(%3) \n\t"
867 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t"
868 "movq %%mm0, %%mm1 \n\t"
869 "punpcklbw %%mm7, %%mm0 \n\t"
870 "punpckhbw %%mm7, %%mm1 \n\t"
872 "psubw %%mm0, %%mm2 \n\t"
873 "psubw %%mm1, %%mm3 \n\t"
874 "movq %%mm2, 16(%3) \n\t"
875 "movq %%mm3, 24(%3) \n\t"
876 "paddw %%mm4, %%mm4 \n\t"
877 "paddw %%mm5, %%mm5 \n\t"
878 "psubw %%mm2, %%mm4 \n\t"
879 "psubw %%mm3, %%mm5 \n\t"
881 "lea (%%"REG_a
", %1), %0 \n\t"
882 "psllw $2, %%mm2 \n\t"
883 "psllw $2, %%mm3 \n\t"
884 "psubw %%mm2, %%mm4 \n\t"
885 "psubw %%mm3, %%mm5 \n\t"
887 "movq (%0, %1, 2), %%mm2 \n\t"
888 "movq %%mm2, %%mm3 \n\t"
889 "punpcklbw %%mm7, %%mm2 \n\t"
890 "punpckhbw %%mm7, %%mm3 \n\t"
891 "psubw %%mm2, %%mm4 \n\t"
892 "psubw %%mm3, %%mm5 \n\t"
893 "psubw %%mm2, %%mm4 \n\t"
894 "psubw %%mm3, %%mm5 \n\t"
896 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t"
897 "punpcklbw %%mm7, %%mm6 \n\t"
898 "psubw %%mm6, %%mm2 \n\t"
899 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t"
900 "punpckhbw %%mm7, %%mm6 \n\t"
901 "psubw %%mm6, %%mm3 \n\t"
903 "paddw %%mm0, %%mm0 \n\t"
904 "paddw %%mm1, %%mm1 \n\t"
905 "psubw %%mm2, %%mm0 \n\t"
906 "psubw %%mm3, %%mm1 \n\t"
908 "psllw $2, %%mm2 \n\t"
909 "psllw $2, %%mm3 \n\t"
910 "psubw %%mm2, %%mm0 \n\t"
911 "psubw %%mm3, %%mm1 \n\t"
913 "movq (%0, %1, 4), %%mm2 \n\t"
914 "movq %%mm2, %%mm3 \n\t"
915 "punpcklbw %%mm7, %%mm2 \n\t"
916 "punpckhbw %%mm7, %%mm3 \n\t"
918 "paddw %%mm2, %%mm2 \n\t"
919 "paddw %%mm3, %%mm3 \n\t"
920 "psubw %%mm2, %%mm0 \n\t"
921 "psubw %%mm3, %%mm1 \n\t"
923 "movq (%3), %%mm2 \n\t"
924 "movq 8(%3), %%mm3 \n\t"
926 #if TEMPLATE_PP_MMXEXT
927 "movq %%mm7, %%mm6 \n\t"
928 "psubw %%mm0, %%mm6 \n\t"
929 "pmaxsw %%mm6, %%mm0 \n\t"
930 "movq %%mm7, %%mm6 \n\t"
931 "psubw %%mm1, %%mm6 \n\t"
932 "pmaxsw %%mm6, %%mm1 \n\t"
933 "movq %%mm7, %%mm6 \n\t"
934 "psubw %%mm2, %%mm6 \n\t"
935 "pmaxsw %%mm6, %%mm2 \n\t"
936 "movq %%mm7, %%mm6 \n\t"
937 "psubw %%mm3, %%mm6 \n\t"
938 "pmaxsw %%mm6, %%mm3 \n\t"
940 "movq %%mm7, %%mm6 \n\t"
941 "pcmpgtw %%mm0, %%mm6 \n\t"
942 "pxor %%mm6, %%mm0 \n\t"
943 "psubw %%mm6, %%mm0 \n\t"
944 "movq %%mm7, %%mm6 \n\t"
945 "pcmpgtw %%mm1, %%mm6 \n\t"
946 "pxor %%mm6, %%mm1 \n\t"
947 "psubw %%mm6, %%mm1 \n\t"
948 "movq %%mm7, %%mm6 \n\t"
949 "pcmpgtw %%mm2, %%mm6 \n\t"
950 "pxor %%mm6, %%mm2 \n\t"
951 "psubw %%mm6, %%mm2 \n\t"
952 "movq %%mm7, %%mm6 \n\t"
953 "pcmpgtw %%mm3, %%mm6 \n\t"
954 "pxor %%mm6, %%mm3 \n\t"
955 "psubw %%mm6, %%mm3 \n\t"
958 #if TEMPLATE_PP_MMXEXT
959 "pminsw %%mm2, %%mm0 \n\t"
960 "pminsw %%mm3, %%mm1 \n\t"
962 "movq %%mm0, %%mm6 \n\t"
963 "psubusw %%mm2, %%mm6 \n\t"
964 "psubw %%mm6, %%mm0 \n\t"
965 "movq %%mm1, %%mm6 \n\t"
966 "psubusw %%mm3, %%mm6 \n\t"
967 "psubw %%mm6, %%mm1 \n\t"
970 "movd %2, %%mm2 \n\t"
971 "punpcklbw %%mm7, %%mm2 \n\t"
973 "movq %%mm7, %%mm6 \n\t"
974 "pcmpgtw %%mm4, %%mm6 \n\t"
975 "pxor %%mm6, %%mm4 \n\t"
976 "psubw %%mm6, %%mm4 \n\t"
977 "pcmpgtw %%mm5, %%mm7 \n\t"
978 "pxor %%mm7, %%mm5 \n\t"
979 "psubw %%mm7, %%mm5 \n\t"
981 "psllw $3, %%mm2 \n\t"
982 "movq %%mm2, %%mm3 \n\t"
983 "pcmpgtw %%mm4, %%mm2 \n\t"
984 "pcmpgtw %%mm5, %%mm3 \n\t"
985 "pand %%mm2, %%mm4 \n\t"
986 "pand %%mm3, %%mm5 \n\t"
989 "psubusw %%mm0, %%mm4 \n\t"
990 "psubusw %%mm1, %%mm5 \n\t"
993 "movq "MANGLE(w05)
", %%mm2 \n\t"
994 "pmullw %%mm2, %%mm4 \n\t"
995 "pmullw %%mm2, %%mm5 \n\t"
996 "movq "MANGLE(w20)
", %%mm2 \n\t"
997 "paddw %%mm2, %%mm4 \n\t"
998 "paddw %%mm2, %%mm5 \n\t"
999 "psrlw $6, %%mm4 \n\t"
1000 "psrlw $6, %%mm5 \n\t"
1002 "movq 16(%3), %%mm0 \n\t"
1003 "movq 24(%3), %%mm1 \n\t"
1005 "pxor %%mm2, %%mm2 \n\t"
1006 "pxor %%mm3, %%mm3 \n\t"
1008 "pcmpgtw %%mm0, %%mm2 \n\t"
1009 "pcmpgtw %%mm1, %%mm3 \n\t"
1010 "pxor %%mm2, %%mm0 \n\t"
1011 "pxor %%mm3, %%mm1 \n\t"
1012 "psubw %%mm2, %%mm0 \n\t"
1013 "psubw %%mm3, %%mm1 \n\t"
1014 "psrlw $1, %%mm0 \n\t"
1015 "psrlw $1, %%mm1 \n\t"
1017 "pxor %%mm6, %%mm2 \n\t"
1018 "pxor %%mm7, %%mm3 \n\t"
1019 "pand %%mm2, %%mm4 \n\t"
1020 "pand %%mm3, %%mm5 \n\t"
1022 #if TEMPLATE_PP_MMXEXT
1023 "pminsw %%mm0, %%mm4 \n\t"
1024 "pminsw %%mm1, %%mm5 \n\t"
1026 "movq %%mm4, %%mm2 \n\t"
1027 "psubusw %%mm0, %%mm2 \n\t"
1028 "psubw %%mm2, %%mm4 \n\t"
1029 "movq %%mm5, %%mm2 \n\t"
1030 "psubusw %%mm1, %%mm2 \n\t"
1031 "psubw %%mm2, %%mm5 \n\t"
1033 "pxor %%mm6, %%mm4 \n\t"
1034 "pxor %%mm7, %%mm5 \n\t"
1035 "psubw %%mm6, %%mm4 \n\t"
1036 "psubw %%mm7, %%mm5 \n\t"
1037 "packsswb %%mm5, %%mm4 \n\t"
1038 "movq (%0), %%mm0 \n\t"
1039 "paddb %%mm4, %%mm0 \n\t"
1040 "movq %%mm0, (%0) \n\t"
1041 "movq (%0, %1), %%mm0 \n\t"
1042 "psubb %%mm4, %%mm0 \n\t"
1043 "movq %%mm0, (%0, %1) \n\t"
1050 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1052 const int l2=
stride + l1;
1053 const int l3=
stride + l2;
1054 const int l4=
stride + l3;
1055 const int l5=
stride + l4;
1056 const int l6=
stride + l5;
1057 const int l7=
stride + l6;
1058 const int l8=
stride + l7;
1063 const int middleEnergy= 5*(
src[l5] -
src[l4]) + 2*(
src[l3] -
src[l6]);
1064 if(
FFABS(middleEnergy) < 8*
c->QP){
1065 const int q=(
src[l4] -
src[l5])/2;
1066 const int leftEnergy= 5*(
src[l3] -
src[l2]) + 2*(
src[l1] -
src[l4]);
1067 const int rightEnergy= 5*(
src[l7] -
src[l6]) + 2*(
src[l5] -
src[l8]);
1073 d*=
FFSIGN(-middleEnergy);
1088 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1090 #endif //TEMPLATE_PP_ALTIVEC
1092 #if !TEMPLATE_PP_ALTIVEC
1095 #if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
1098 "pxor %%mm6, %%mm6 \n\t"
1099 "pcmpeqb %%mm7, %%mm7 \n\t"
1100 "movq %2, %%mm0 \n\t"
1101 "punpcklbw %%mm6, %%mm0 \n\t"
1102 "psrlw $1, %%mm0 \n\t"
1103 "psubw %%mm7, %%mm0 \n\t"
1104 "packuswb %%mm0, %%mm0 \n\t"
1105 "movq %%mm0, %3 \n\t"
1107 "lea (%0, %1), %%"REG_a
" \n\t"
1108 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1113 #undef REAL_FIND_MIN_MAX
1115 #if TEMPLATE_PP_MMXEXT
1116 #define REAL_FIND_MIN_MAX(addr)\
1117 "movq " #addr ", %%mm0 \n\t"\
1118 "pminub %%mm0, %%mm7 \n\t"\
1119 "pmaxub %%mm0, %%mm6 \n\t"
1121 #define REAL_FIND_MIN_MAX(addr)\
1122 "movq " #addr ", %%mm0 \n\t"\
1123 "movq %%mm7, %%mm1 \n\t"\
1124 "psubusb %%mm0, %%mm6 \n\t"\
1125 "paddb %%mm0, %%mm6 \n\t"\
1126 "psubusb %%mm0, %%mm1 \n\t"\
1127 "psubb %%mm1, %%mm7 \n\t"
1129 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
1131 FIND_MIN_MAX((%%REGa))
1132 FIND_MIN_MAX((%%REGa, %1))
1133 FIND_MIN_MAX((%%REGa, %1, 2))
1134 FIND_MIN_MAX((%0, %1, 4))
1135 FIND_MIN_MAX((%%REGd))
1136 FIND_MIN_MAX((%%REGd, %1))
1137 FIND_MIN_MAX((%%REGd, %1, 2))
1138 FIND_MIN_MAX((%0, %1, 8))
1140 "movq %%mm7, %%mm4 \n\t"
1141 "psrlq $8, %%mm7 \n\t"
1142 #if TEMPLATE_PP_MMXEXT
1143 "pminub %%mm4, %%mm7 \n\t"
1144 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
1145 "pminub %%mm4, %%mm7 \n\t"
1146 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
1147 "pminub %%mm4, %%mm7 \n\t"
1149 "movq %%mm7, %%mm1 \n\t"
1150 "psubusb %%mm4, %%mm1 \n\t"
1151 "psubb %%mm1, %%mm7 \n\t"
1152 "movq %%mm7, %%mm4 \n\t"
1153 "psrlq $16, %%mm7 \n\t"
1154 "movq %%mm7, %%mm1 \n\t"
1155 "psubusb %%mm4, %%mm1 \n\t"
1156 "psubb %%mm1, %%mm7 \n\t"
1157 "movq %%mm7, %%mm4 \n\t"
1158 "psrlq $32, %%mm7 \n\t"
1159 "movq %%mm7, %%mm1 \n\t"
1160 "psubusb %%mm4, %%mm1 \n\t"
1161 "psubb %%mm1, %%mm7 \n\t"
1165 "movq %%mm6, %%mm4 \n\t"
1166 "psrlq $8, %%mm6 \n\t"
1167 #if TEMPLATE_PP_MMXEXT
1168 "pmaxub %%mm4, %%mm6 \n\t"
1169 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
1170 "pmaxub %%mm4, %%mm6 \n\t"
1171 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
1172 "pmaxub %%mm4, %%mm6 \n\t"
1174 "psubusb %%mm4, %%mm6 \n\t"
1175 "paddb %%mm4, %%mm6 \n\t"
1176 "movq %%mm6, %%mm4 \n\t"
1177 "psrlq $16, %%mm6 \n\t"
1178 "psubusb %%mm4, %%mm6 \n\t"
1179 "paddb %%mm4, %%mm6 \n\t"
1180 "movq %%mm6, %%mm4 \n\t"
1181 "psrlq $32, %%mm6 \n\t"
1182 "psubusb %%mm4, %%mm6 \n\t"
1183 "paddb %%mm4, %%mm6 \n\t"
1185 "movq %%mm6, %%mm0 \n\t"
1186 "psubb %%mm7, %%mm6 \n\t"
1188 "movd %%mm6, %k4 \n\t"
1189 "cmpb "MANGLE(deringThreshold)
", %b4 \n\t"
1193 "punpcklbw %%mm7, %%mm7 \n\t"
1194 "punpcklbw %%mm7, %%mm7 \n\t"
1195 "punpcklbw %%mm7, %%mm7 \n\t"
1196 "movq %%mm7, (%4) \n\t"
1198 "movq (%0), %%mm0 \n\t"
1199 "movq %%mm0, %%mm1 \n\t"
1200 "movq %%mm0, %%mm2 \n\t"
1201 "psllq $8, %%mm1 \n\t"
1202 "psrlq $8, %%mm2 \n\t"
1203 "movd -4(%0), %%mm3 \n\t"
1204 "movd 8(%0), %%mm4 \n\t"
1205 "psrlq $24, %%mm3 \n\t"
1206 "psllq $56, %%mm4 \n\t"
1207 "por %%mm3, %%mm1 \n\t"
1208 "por %%mm4, %%mm2 \n\t"
1209 "movq %%mm1, %%mm3 \n\t"
1212 "psubusb %%mm7, %%mm0 \n\t"
1213 "psubusb %%mm7, %%mm2 \n\t"
1214 "psubusb %%mm7, %%mm3 \n\t"
1215 "pcmpeqb "MANGLE(b00)
", %%mm0 \n\t"
1216 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t"
1217 "pcmpeqb "MANGLE(b00)
", %%mm3 \n\t"
1218 "paddb %%mm2, %%mm0 \n\t"
1219 "paddb %%mm3, %%mm0 \n\t"
1221 "movq (%%"REG_a
"), %%mm2 \n\t"
1222 "movq %%mm2, %%mm3 \n\t"
1223 "movq %%mm2, %%mm4 \n\t"
1224 "psllq $8, %%mm3 \n\t"
1225 "psrlq $8, %%mm4 \n\t"
1226 "movd -4(%%"REG_a
"), %%mm5 \n\t"
1227 "movd 8(%%"REG_a
"), %%mm6 \n\t"
1228 "psrlq $24, %%mm5 \n\t"
1229 "psllq $56, %%mm6 \n\t"
1230 "por %%mm5, %%mm3 \n\t"
1231 "por %%mm6, %%mm4 \n\t"
1232 "movq %%mm3, %%mm5 \n\t"
1235 "psubusb %%mm7, %%mm2 \n\t"
1236 "psubusb %%mm7, %%mm4 \n\t"
1237 "psubusb %%mm7, %%mm5 \n\t"
1238 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t"
1239 "pcmpeqb "MANGLE(b00)
", %%mm4 \n\t"
1240 "pcmpeqb "MANGLE(b00)
", %%mm5 \n\t"
1241 "paddb %%mm4, %%mm2 \n\t"
1242 "paddb %%mm5, %%mm2 \n\t"
1244 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
1245 "movq " #src ", " #sx " \n\t" \
1246 "movq " #sx ", " #lx " \n\t" \
1247 "movq " #sx ", " #t0 " \n\t" \
1248 "psllq $8, " #lx " \n\t"\
1249 "psrlq $8, " #t0 " \n\t"\
1250 "movd -4" #src ", " #t1 " \n\t"\
1251 "psrlq $24, " #t1 " \n\t"\
1252 "por " #t1 ", " #lx " \n\t" \
1253 "movd 8" #src ", " #t1 " \n\t"\
1254 "psllq $56, " #t1 " \n\t"\
1255 "por " #t1 ", " #t0 " \n\t" \
1256 "movq " #lx ", " #t1 " \n\t" \
1260 "movq " #lx ", 8(%4) \n\t"\
1261 "movq (%4), " #lx " \n\t"\
1262 "psubusb " #lx ", " #t1 " \n\t"\
1263 "psubusb " #lx ", " #t0 " \n\t"\
1264 "psubusb " #lx ", " #sx " \n\t"\
1265 "movq "MANGLE(b00)", " #lx " \n\t"\
1266 "pcmpeqb " #lx ", " #t1 " \n\t" \
1267 "pcmpeqb " #lx ", " #t0 " \n\t" \
1268 "pcmpeqb " #lx ", " #sx " \n\t" \
1269 "paddb " #t1 ", " #t0 " \n\t"\
1270 "paddb " #t0 ", " #sx " \n\t"\
1273 "movq " #dst ", " #t0 " \n\t" \
1274 "movq " #t0 ", " #t1 " \n\t" \
1275 "psubusb %3, " #t0 " \n\t"\
1276 "paddusb %3, " #t1 " \n\t"\
1278 PMINUB(t1, pplx, t0)\
1279 "paddb " #sx ", " #ppsx " \n\t"\
1280 "paddb " #psx ", " #ppsx " \n\t"\
1281 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
1282 "pand "MANGLE(b08)", " #ppsx " \n\t"\
1283 "pcmpeqb " #lx ", " #ppsx " \n\t"\
1284 "pand " #ppsx ", " #pplx " \n\t"\
1285 "pandn " #dst ", " #ppsx " \n\t"\
1286 "por " #pplx ", " #ppsx " \n\t"\
1287 "movq " #ppsx ", " #dst " \n\t"\
1288 "movq 8(%4), " #lx " \n\t"
1290 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
1291 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
1308 DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1309 DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1310 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1311 DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1312 DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1313 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1314 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1315 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1320 :
"%"REG_a,
"%"REG_d,
"%"REG_SP
1322 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
1329 const int QP2=
c->QP/2 + 1;
1337 if(*p > max) max= *p;
1338 if(*p < min) min= *p;
1341 avg= (min + max + 1)>>1;
1343 if(max - min <deringThreshold)
return;
1345 for(y=0; y<10; y++){
1360 t &= (t<<1) & (t>>1);
1365 int t = s[y-1] & s[
y] & s[y+1];
1379 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
1383 #ifdef DEBUG_DERING_THRESHOLD
1384 __asm__
volatile(
"emms\n\t":);
1386 static long long numPixels=0;
1387 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
1392 static int numSkipped=0;
1393 static int errorSum=0;
1394 static int worstQP=0;
1395 static int worstRange=0;
1396 static int worstDiff=0;
1398 int absDiff=
FFABS(diff);
1399 int error= diff*
diff;
1401 if(x==1 || x==8 || y==1 || y==8)
continue;
1404 if(absDiff > worstDiff){
1407 worstRange= max-
min;
1411 if(1024LL*1024LL*1024LL % numSkipped == 0){
1413 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
1414 (
float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
1415 worstDiff, (
float)numSkipped/numPixels);
1420 if (*p + QP2 < f) *p= *p + QP2;
1421 else if(*p - QP2 > f) *p= *p - QP2;
1426 #ifdef DEBUG_DERING_THRESHOLD
1434 *p =
FFMIN(*p + 20, 255);
1440 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1442 #endif //TEMPLATE_PP_ALTIVEC
1452 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1455 "lea (%0, %1), %%"REG_a
" \n\t"
1456 "lea (%%"REG_a
", %1, 4), %%"REG_c
" \n\t"
1460 "movq (%0), %%mm0 \n\t"
1461 "movq (%%"REG_a
", %1), %%mm1 \n\t"
1463 "movq %%mm0, (%%"REG_a
") \n\t"
1464 "movq (%0, %1, 4), %%mm0 \n\t"
1466 "movq %%mm1, (%%"REG_a
", %1, 2) \n\t"
1467 "movq (%%"REG_c
", %1), %%mm1 \n\t"
1469 "movq %%mm0, (%%"REG_c
") \n\t"
1470 "movq (%0, %1, 8), %%mm0 \n\t"
1472 "movq %%mm1, (%%"REG_c
", %1, 2) \n\t"
1475 :
"%"REG_a,
"%"REG_c
1484 *(uint32_t*)&
src[
stride*1]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1486 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1488 *(uint32_t*)&
src[
stride*5]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1490 *(uint32_t*)&
src[
stride*7]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1505 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1508 "lea (%0, %1), %%"REG_a
" \n\t"
1509 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1510 "lea (%%"REG_d
", %1, 4), %%"REG_c
" \n\t"
1511 "add %1, %%"REG_c
" \n\t"
1512 #if TEMPLATE_PP_SSE2
1513 "pxor %%xmm7, %%xmm7 \n\t"
1514 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
1515 "movq " #a ", %%xmm0 \n\t"\
1516 "movq " #b ", %%xmm1 \n\t"\
1517 "movq " #d ", %%xmm2 \n\t"\
1518 "movq " #e ", %%xmm3 \n\t"\
1519 "pavgb %%xmm2, %%xmm1 \n\t"\
1520 "pavgb %%xmm3, %%xmm0 \n\t"\
1521 "punpcklbw %%xmm7, %%xmm0 \n\t"\
1522 "punpcklbw %%xmm7, %%xmm1 \n\t"\
1523 "psubw %%xmm1, %%xmm0 \n\t"\
1524 "psraw $3, %%xmm0 \n\t"\
1525 "psubw %%xmm0, %%xmm1 \n\t"\
1526 "packuswb %%xmm1, %%xmm1 \n\t"\
1527 "movlps %%xmm1, " #c " \n\t"
1528 #else //TEMPLATE_PP_SSE2
1529 "pxor %%mm7, %%mm7 \n\t"
1533 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
1534 "movq " #a ", %%mm0 \n\t"\
1535 "movq " #b ", %%mm1 \n\t"\
1536 "movq " #d ", %%mm2 \n\t"\
1537 "movq " #e ", %%mm3 \n\t"\
1538 PAVGB(%%mm2, %%mm1) \
1539 PAVGB(%%mm3, %%mm0) \
1540 "movq %%mm0, %%mm2 \n\t"\
1541 "punpcklbw %%mm7, %%mm0 \n\t"\
1542 "punpckhbw %%mm7, %%mm2 \n\t"\
1543 "movq %%mm1, %%mm3 \n\t"\
1544 "punpcklbw %%mm7, %%mm1 \n\t"\
1545 "punpckhbw %%mm7, %%mm3 \n\t"\
1546 "psubw %%mm1, %%mm0 \n\t" \
1547 "psubw %%mm3, %%mm2 \n\t" \
1548 "psraw $3, %%mm0 \n\t" \
1549 "psraw $3, %%mm2 \n\t" \
1550 "psubw %%mm0, %%mm1 \n\t" \
1551 "psubw %%mm2, %%mm3 \n\t" \
1552 "packuswb %%mm3, %%mm1 \n\t"\
1553 "movq %%mm1, " #c " \n\t"
1554 #endif //TEMPLATE_PP_SSE2
1555 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
1557 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
1558 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
1559 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
1560 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
1565 XMM_CLOBBERS(
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm7",)
1567 "%"REG_a,
"%"REG_d,
"%"REG_c
1569 #undef REAL_DEINT_CUBIC
1570 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1575 src[stride*5] = av_clip_uint8((-
src[stride*2] + 9*
src[stride*4] + 9*
src[stride*6] -
src[stride*8])>>4);
1576 src[stride*7] = av_clip_uint8((-
src[stride*4] + 9*
src[stride*6] + 9*
src[stride*8] -
src[stride*10])>>4);
1577 src[stride*9] = av_clip_uint8((-
src[stride*6] + 9*
src[stride*8] + 9*
src[stride*10] -
src[stride*12])>>4);
1580 #endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1592 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1595 "lea (%0, %1), %%"REG_a
" \n\t"
1596 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1597 "pxor %%mm7, %%mm7 \n\t"
1598 "movq (%2), %%mm0 \n\t"
1602 #define REAL_DEINT_FF(a,b,c,d)\
1603 "movq " #a ", %%mm1 \n\t"\
1604 "movq " #b ", %%mm2 \n\t"\
1605 "movq " #c ", %%mm3 \n\t"\
1606 "movq " #d ", %%mm4 \n\t"\
1607 PAVGB(%%mm3, %%mm1) \
1608 PAVGB(%%mm4, %%mm0) \
1609 "movq %%mm0, %%mm3 \n\t"\
1610 "punpcklbw %%mm7, %%mm0 \n\t"\
1611 "punpckhbw %%mm7, %%mm3 \n\t"\
1612 "movq %%mm1, %%mm4 \n\t"\
1613 "punpcklbw %%mm7, %%mm1 \n\t"\
1614 "punpckhbw %%mm7, %%mm4 \n\t"\
1615 "psllw $2, %%mm1 \n\t"\
1616 "psllw $2, %%mm4 \n\t"\
1617 "psubw %%mm0, %%mm1 \n\t"\
1618 "psubw %%mm3, %%mm4 \n\t"\
1619 "movq %%mm2, %%mm5 \n\t"\
1620 "movq %%mm2, %%mm0 \n\t"\
1621 "punpcklbw %%mm7, %%mm2 \n\t"\
1622 "punpckhbw %%mm7, %%mm5 \n\t"\
1623 "paddw %%mm2, %%mm1 \n\t"\
1624 "paddw %%mm5, %%mm4 \n\t"\
1625 "psraw $2, %%mm1 \n\t"\
1626 "psraw $2, %%mm4 \n\t"\
1627 "packuswb %%mm4, %%mm1 \n\t"\
1628 "movq %%mm1, " #b " \n\t"\
1630 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
1632 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
1633 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
1634 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
1635 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
1637 "movq %%mm0, (%2) \n\t"
1639 :
"%"REG_a,
"%"REG_d
1641 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1659 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1671 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
1674 "lea (%0, %1), %%"REG_a
" \n\t"
1675 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1676 "pxor %%mm7, %%mm7 \n\t"
1677 "movq (%2), %%mm0 \n\t"
1678 "movq (%3), %%mm1 \n\t"
1682 #define REAL_DEINT_L5(t1,t2,a,b,c)\
1683 "movq " #a ", %%mm2 \n\t"\
1684 "movq " #b ", %%mm3 \n\t"\
1685 "movq " #c ", %%mm4 \n\t"\
1688 "movq %%mm2, %%mm5 \n\t"\
1689 "movq %%mm2, " #t1 " \n\t"\
1690 "punpcklbw %%mm7, %%mm2 \n\t"\
1691 "punpckhbw %%mm7, %%mm5 \n\t"\
1692 "movq %%mm2, %%mm6 \n\t"\
1693 "paddw %%mm2, %%mm2 \n\t"\
1694 "paddw %%mm6, %%mm2 \n\t"\
1695 "movq %%mm5, %%mm6 \n\t"\
1696 "paddw %%mm5, %%mm5 \n\t"\
1697 "paddw %%mm6, %%mm5 \n\t"\
1698 "movq %%mm3, %%mm6 \n\t"\
1699 "punpcklbw %%mm7, %%mm3 \n\t"\
1700 "punpckhbw %%mm7, %%mm6 \n\t"\
1701 "paddw %%mm3, %%mm3 \n\t"\
1702 "paddw %%mm6, %%mm6 \n\t"\
1703 "paddw %%mm3, %%mm2 \n\t"\
1704 "paddw %%mm6, %%mm5 \n\t"\
1705 "movq %%mm4, %%mm6 \n\t"\
1706 "punpcklbw %%mm7, %%mm4 \n\t"\
1707 "punpckhbw %%mm7, %%mm6 \n\t"\
1708 "psubw %%mm4, %%mm2 \n\t"\
1709 "psubw %%mm6, %%mm5 \n\t"\
1710 "psraw $2, %%mm2 \n\t"\
1711 "psraw $2, %%mm5 \n\t"\
1712 "packuswb %%mm5, %%mm2 \n\t"\
1713 "movq %%mm2, " #a " \n\t"\
1715 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
1717 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
1718 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
1719 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
1720 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
1721 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
1722 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
1723 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
1724 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
1726 "movq %%mm0, (%2) \n\t"
1727 "movq %%mm1, (%3) \n\t"
1729 :
"%"REG_a,
"%"REG_d
1731 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
1741 src[stride*1]= av_clip_uint8((-(t2 +
src[stride*3]) + 2*(t3 +
src[stride*2]) + 6*t1 + 4)>>3);
1743 src[stride*2]= av_clip_uint8((-(t3 +
src[stride*4]) + 2*(t1 +
src[stride*3]) + 6*t2 + 4)>>3);
1745 src[stride*3]= av_clip_uint8((-(t1 +
src[stride*5]) + 2*(t2 +
src[stride*4]) + 6*t3 + 4)>>3);
1747 src[stride*4]= av_clip_uint8((-(t2 +
src[stride*6]) + 2*(t3 +
src[stride*5]) + 6*t1 + 4)>>3);
1749 src[stride*5]= av_clip_uint8((-(t3 +
src[stride*7]) + 2*(t1 +
src[stride*6]) + 6*t2 + 4)>>3);
1751 src[stride*6]= av_clip_uint8((-(t1 +
src[stride*8]) + 2*(t2 +
src[stride*7]) + 6*t3 + 4)>>3);
1753 src[stride*7]= av_clip_uint8((-(t2 +
src[stride*9]) + 2*(t3 +
src[stride*8]) + 6*t1 + 4)>>3);
1760 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
1772 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1775 "lea (%0, %1), %%"REG_a
" \n\t"
1776 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1780 "movq (%2), %%mm0 \n\t"
1781 "movq (%%"REG_a
"), %%mm1 \n\t"
1783 "movq (%0), %%mm2 \n\t"
1785 "movq %%mm0, (%0) \n\t"
1786 "movq (%%"REG_a
", %1), %%mm0 \n\t"
1789 "movq %%mm2, (%%"REG_a
") \n\t"
1790 "movq (%%"REG_a
", %1, 2), %%mm2 \n\t"
1793 "movq %%mm1, (%%"REG_a
", %1) \n\t"
1794 "movq (%0, %1, 4), %%mm1 \n\t"
1797 "movq %%mm0, (%%"REG_a
", %1, 2) \n\t"
1798 "movq (%%"REG_d
"), %%mm0 \n\t"
1801 "movq %%mm2, (%0, %1, 4) \n\t"
1802 "movq (%%"REG_d
", %1), %%mm2 \n\t"
1805 "movq %%mm1, (%%"REG_d
") \n\t"
1806 "movq (%%"REG_d
", %1, 2), %%mm1 \n\t"
1809 "movq %%mm0, (%%"REG_d
", %1) \n\t"
1810 "movq (%0, %1, 8), %%mm0 \n\t"
1813 "movq %%mm2, (%%"REG_d
", %1, 2) \n\t"
1814 "movq %%mm1, (%2) \n\t"
1817 :
"%"REG_a,
"%"REG_d
1819 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1824 a= *(uint32_t*)&tmp[
stride*0];
1827 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1828 *(uint32_t*)&
src[
stride*0]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1831 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1832 *(uint32_t*)&
src[
stride*1]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1835 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1836 *(uint32_t*)&
src[
stride*2]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1839 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1840 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1843 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1844 *(uint32_t*)&
src[
stride*4]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1847 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1848 *(uint32_t*)&
src[
stride*5]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1851 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1852 *(uint32_t*)&
src[
stride*6]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1855 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1856 *(uint32_t*)&
src[
stride*7]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1858 *(uint32_t*)&tmp[
stride*0]= c;
1862 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
1875 #if TEMPLATE_PP_MMXEXT
1877 "lea (%0, %1), %%"REG_a
" \n\t"
1878 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1882 "movq (%0), %%mm0 \n\t"
1883 "movq (%%"REG_a
", %1), %%mm2 \n\t"
1884 "movq (%%"REG_a
"), %%mm1 \n\t"
1885 "movq %%mm0, %%mm3 \n\t"
1886 "pmaxub %%mm1, %%mm0 \n\t"
1887 "pminub %%mm3, %%mm1 \n\t"
1888 "pmaxub %%mm2, %%mm1 \n\t"
1889 "pminub %%mm1, %%mm0 \n\t"
1890 "movq %%mm0, (%%"REG_a
") \n\t"
1892 "movq (%0, %1, 4), %%mm0 \n\t"
1893 "movq (%%"REG_a
", %1, 2), %%mm1 \n\t"
1894 "movq %%mm2, %%mm3 \n\t"
1895 "pmaxub %%mm1, %%mm2 \n\t"
1896 "pminub %%mm3, %%mm1 \n\t"
1897 "pmaxub %%mm0, %%mm1 \n\t"
1898 "pminub %%mm1, %%mm2 \n\t"
1899 "movq %%mm2, (%%"REG_a
", %1, 2) \n\t"
1901 "movq (%%"REG_d
"), %%mm2 \n\t"
1902 "movq (%%"REG_d
", %1), %%mm1 \n\t"
1903 "movq %%mm2, %%mm3 \n\t"
1904 "pmaxub %%mm0, %%mm2 \n\t"
1905 "pminub %%mm3, %%mm0 \n\t"
1906 "pmaxub %%mm1, %%mm0 \n\t"
1907 "pminub %%mm0, %%mm2 \n\t"
1908 "movq %%mm2, (%%"REG_d
") \n\t"
1910 "movq (%%"REG_d
", %1, 2), %%mm2 \n\t"
1911 "movq (%0, %1, 8), %%mm0 \n\t"
1912 "movq %%mm2, %%mm3 \n\t"
1913 "pmaxub %%mm0, %%mm2 \n\t"
1914 "pminub %%mm3, %%mm0 \n\t"
1915 "pmaxub %%mm1, %%mm0 \n\t"
1916 "pminub %%mm0, %%mm2 \n\t"
1917 "movq %%mm2, (%%"REG_d
", %1, 2) \n\t"
1921 :
"%"REG_a,
"%"REG_d
1924 #else // MMX without MMX2
1926 "lea (%0, %1), %%"REG_a
" \n\t"
1927 "lea (%%"REG_a
", %1, 4), %%"REG_d
" \n\t"
1930 "pxor %%mm7, %%mm7 \n\t"
1932 #define REAL_MEDIAN(a,b,c)\
1933 "movq " #a ", %%mm0 \n\t"\
1934 "movq " #b ", %%mm2 \n\t"\
1935 "movq " #c ", %%mm1 \n\t"\
1936 "movq %%mm0, %%mm3 \n\t"\
1937 "movq %%mm1, %%mm4 \n\t"\
1938 "movq %%mm2, %%mm5 \n\t"\
1939 "psubusb %%mm1, %%mm3 \n\t"\
1940 "psubusb %%mm2, %%mm4 \n\t"\
1941 "psubusb %%mm0, %%mm5 \n\t"\
1942 "pcmpeqb %%mm7, %%mm3 \n\t"\
1943 "pcmpeqb %%mm7, %%mm4 \n\t"\
1944 "pcmpeqb %%mm7, %%mm5 \n\t"\
1945 "movq %%mm3, %%mm6 \n\t"\
1946 "pxor %%mm4, %%mm3 \n\t"\
1947 "pxor %%mm5, %%mm4 \n\t"\
1948 "pxor %%mm6, %%mm5 \n\t"\
1949 "por %%mm3, %%mm1 \n\t"\
1950 "por %%mm4, %%mm2 \n\t"\
1951 "por %%mm5, %%mm0 \n\t"\
1952 "pand %%mm2, %%mm0 \n\t"\
1953 "pand %%mm1, %%mm0 \n\t"\
1954 "movq %%mm0, " #b " \n\t"
1955 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
1957 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
1958 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
1959 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
1960 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
1963 :
"%"REG_a,
"%"REG_d
1965 #endif //TEMPLATE_PP_MMXEXT
1966 #else //TEMPLATE_PP_MMX
1972 for (y=0; y<4; y++){
1973 int a,
b,
c, d, e, f;
1980 colsrc[
stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
1985 #endif //TEMPLATE_PP_MMX
1995 "lea (%0, %1), %%"REG_a
" \n\t"
1998 "movq (%0), %%mm0 \n\t"
1999 "movq (%%"REG_a
"), %%mm1 \n\t"
2000 "movq %%mm0, %%mm2 \n\t"
2001 "punpcklbw %%mm1, %%mm0 \n\t"
2002 "punpckhbw %%mm1, %%mm2 \n\t"
2004 "movq (%%"REG_a
", %1), %%mm1 \n\t"
2005 "movq (%%"REG_a
", %1, 2), %%mm3 \n\t"
2006 "movq %%mm1, %%mm4 \n\t"
2007 "punpcklbw %%mm3, %%mm1 \n\t"
2008 "punpckhbw %%mm3, %%mm4 \n\t"
2010 "movq %%mm0, %%mm3 \n\t"
2011 "punpcklwd %%mm1, %%mm0 \n\t"
2012 "punpckhwd %%mm1, %%mm3 \n\t"
2013 "movq %%mm2, %%mm1 \n\t"
2014 "punpcklwd %%mm4, %%mm2 \n\t"
2015 "punpckhwd %%mm4, %%mm1 \n\t"
2017 "movd %%mm0, 128(%2) \n\t"
2018 "psrlq $32, %%mm0 \n\t"
2019 "movd %%mm0, 144(%2) \n\t"
2020 "movd %%mm3, 160(%2) \n\t"
2021 "psrlq $32, %%mm3 \n\t"
2022 "movd %%mm3, 176(%2) \n\t"
2023 "movd %%mm3, 48(%3) \n\t"
2024 "movd %%mm2, 192(%2) \n\t"
2025 "movd %%mm2, 64(%3) \n\t"
2026 "psrlq $32, %%mm2 \n\t"
2027 "movd %%mm2, 80(%3) \n\t"
2028 "movd %%mm1, 96(%3) \n\t"
2029 "psrlq $32, %%mm1 \n\t"
2030 "movd %%mm1, 112(%3) \n\t"
2032 "lea (%%"REG_a
", %1, 4), %%"REG_a
" \n\t"
2034 "movq (%0, %1, 4), %%mm0 \n\t"
2035 "movq (%%"REG_a
"), %%mm1 \n\t"
2036 "movq %%mm0, %%mm2 \n\t"
2037 "punpcklbw %%mm1, %%mm0 \n\t"
2038 "punpckhbw %%mm1, %%mm2 \n\t"
2040 "movq (%%"REG_a
", %1), %%mm1 \n\t"
2041 "movq (%%"REG_a
", %1, 2), %%mm3 \n\t"
2042 "movq %%mm1, %%mm4 \n\t"
2043 "punpcklbw %%mm3, %%mm1 \n\t"
2044 "punpckhbw %%mm3, %%mm4 \n\t"
2046 "movq %%mm0, %%mm3 \n\t"
2047 "punpcklwd %%mm1, %%mm0 \n\t"
2048 "punpckhwd %%mm1, %%mm3 \n\t"
2049 "movq %%mm2, %%mm1 \n\t"
2050 "punpcklwd %%mm4, %%mm2 \n\t"
2051 "punpckhwd %%mm4, %%mm1 \n\t"
2053 "movd %%mm0, 132(%2) \n\t"
2054 "psrlq $32, %%mm0 \n\t"
2055 "movd %%mm0, 148(%2) \n\t"
2056 "movd %%mm3, 164(%2) \n\t"
2057 "psrlq $32, %%mm3 \n\t"
2058 "movd %%mm3, 180(%2) \n\t"
2059 "movd %%mm3, 52(%3) \n\t"
2060 "movd %%mm2, 196(%2) \n\t"
2061 "movd %%mm2, 68(%3) \n\t"
2062 "psrlq $32, %%mm2 \n\t"
2063 "movd %%mm2, 84(%3) \n\t"
2064 "movd %%mm1, 100(%3) \n\t"
2065 "psrlq $32, %%mm1 \n\t"
2066 "movd %%mm1, 116(%3) \n\t"
2069 ::
"r" (
src),
"r" ((
x86_reg)srcStride),
"r" (dst1),
"r" (dst2)
2080 "lea (%0, %1), %%"REG_a
" \n\t"
2081 "lea (%%"REG_a
",%1,4), %%"REG_d
" \n\t"
2084 "movq (%2), %%mm0 \n\t"
2085 "movq 16(%2), %%mm1 \n\t"
2086 "movq %%mm0, %%mm2 \n\t"
2087 "punpcklbw %%mm1, %%mm0 \n\t"
2088 "punpckhbw %%mm1, %%mm2 \n\t"
2090 "movq 32(%2), %%mm1 \n\t"
2091 "movq 48(%2), %%mm3 \n\t"
2092 "movq %%mm1, %%mm4 \n\t"
2093 "punpcklbw %%mm3, %%mm1 \n\t"
2094 "punpckhbw %%mm3, %%mm4 \n\t"
2096 "movq %%mm0, %%mm3 \n\t"
2097 "punpcklwd %%mm1, %%mm0 \n\t"
2098 "punpckhwd %%mm1, %%mm3 \n\t"
2099 "movq %%mm2, %%mm1 \n\t"
2100 "punpcklwd %%mm4, %%mm2 \n\t"
2101 "punpckhwd %%mm4, %%mm1 \n\t"
2103 "movd %%mm0, (%0) \n\t"
2104 "psrlq $32, %%mm0 \n\t"
2105 "movd %%mm0, (%%"REG_a
") \n\t"
2106 "movd %%mm3, (%%"REG_a
", %1) \n\t"
2107 "psrlq $32, %%mm3 \n\t"
2108 "movd %%mm3, (%%"REG_a
", %1, 2) \n\t"
2109 "movd %%mm2, (%0, %1, 4) \n\t"
2110 "psrlq $32, %%mm2 \n\t"
2111 "movd %%mm2, (%%"REG_d
") \n\t"
2112 "movd %%mm1, (%%"REG_d
", %1) \n\t"
2113 "psrlq $32, %%mm1 \n\t"
2114 "movd %%mm1, (%%"REG_d
", %1, 2) \n\t"
2117 "movq 64(%2), %%mm0 \n\t"
2118 "movq 80(%2), %%mm1 \n\t"
2119 "movq %%mm0, %%mm2 \n\t"
2120 "punpcklbw %%mm1, %%mm0 \n\t"
2121 "punpckhbw %%mm1, %%mm2 \n\t"
2123 "movq 96(%2), %%mm1 \n\t"
2124 "movq 112(%2), %%mm3 \n\t"
2125 "movq %%mm1, %%mm4 \n\t"
2126 "punpcklbw %%mm3, %%mm1 \n\t"
2127 "punpckhbw %%mm3, %%mm4 \n\t"
2129 "movq %%mm0, %%mm3 \n\t"
2130 "punpcklwd %%mm1, %%mm0 \n\t"
2131 "punpckhwd %%mm1, %%mm3 \n\t"
2132 "movq %%mm2, %%mm1 \n\t"
2133 "punpcklwd %%mm4, %%mm2 \n\t"
2134 "punpckhwd %%mm4, %%mm1 \n\t"
2136 "movd %%mm0, 4(%0) \n\t"
2137 "psrlq $32, %%mm0 \n\t"
2138 "movd %%mm0, 4(%%"REG_a
") \n\t"
2139 "movd %%mm3, 4(%%"REG_a
", %1) \n\t"
2140 "psrlq $32, %%mm3 \n\t"
2141 "movd %%mm3, 4(%%"REG_a
", %1, 2) \n\t"
2142 "movd %%mm2, 4(%0, %1, 4) \n\t"
2143 "psrlq $32, %%mm2 \n\t"
2144 "movd %%mm2, 4(%%"REG_d
") \n\t"
2145 "movd %%mm1, 4(%%"REG_d
", %1) \n\t"
2146 "psrlq $32, %%mm1 \n\t"
2147 "movd %%mm1, 4(%%"REG_d
", %1, 2) \n\t"
2149 ::
"r" (dst),
"r" ((
x86_reg)dstStride),
"r" (
src)
2150 :
"%"REG_a,
"%"REG_d
2153 #endif //TEMPLATE_PP_MMX
2156 #if !TEMPLATE_PP_ALTIVEC
2158 uint8_t *tempBlurred, uint32_t *tempBlurredPast,
const int *maxNoise)
2161 tempBlurredPast[127]= maxNoise[0];
2162 tempBlurredPast[128]= maxNoise[1];
2163 tempBlurredPast[129]= maxNoise[2];
2165 #define FAST_L2_DIFF
2167 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
2169 "lea (%2, %2, 2), %%"REG_a
" \n\t"
2170 "lea (%2, %2, 4), %%"REG_d
" \n\t"
2171 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t"
2175 #ifdef L1_DIFF //needs mmx2
2176 "movq (%0), %%mm0 \n\t"
2177 "psadbw (%1), %%mm0 \n\t"
2178 "movq (%0, %2), %%mm1 \n\t"
2179 "psadbw (%1, %2), %%mm1 \n\t"
2180 "movq (%0, %2, 2), %%mm2 \n\t"
2181 "psadbw (%1, %2, 2), %%mm2 \n\t"
2182 "movq (%0, %%"REG_a
"), %%mm3 \n\t"
2183 "psadbw (%1, %%"REG_a
"), %%mm3 \n\t"
2185 "movq (%0, %2, 4), %%mm4 \n\t"
2186 "paddw %%mm1, %%mm0 \n\t"
2187 "psadbw (%1, %2, 4), %%mm4 \n\t"
2188 "movq (%0, %%"REG_d
"), %%mm5 \n\t"
2189 "paddw %%mm2, %%mm0 \n\t"
2190 "psadbw (%1, %%"REG_d
"), %%mm5 \n\t"
2191 "movq (%0, %%"REG_a
", 2), %%mm6 \n\t"
2192 "paddw %%mm3, %%mm0 \n\t"
2193 "psadbw (%1, %%"REG_a
", 2), %%mm6 \n\t"
2194 "movq (%0, %%"REG_c
"), %%mm7 \n\t"
2195 "paddw %%mm4, %%mm0 \n\t"
2196 "psadbw (%1, %%"REG_c
"), %%mm7 \n\t"
2197 "paddw %%mm5, %%mm6 \n\t"
2198 "paddw %%mm7, %%mm6 \n\t"
2199 "paddw %%mm6, %%mm0 \n\t"
2201 #if defined (FAST_L2_DIFF)
2202 "pcmpeqb %%mm7, %%mm7 \n\t"
2203 "movq "MANGLE(b80)
", %%mm6 \n\t"
2204 "pxor %%mm0, %%mm0 \n\t"
2205 #define REAL_L2_DIFF_CORE(a, b)\
2206 "movq " #a ", %%mm5 \n\t"\
2207 "movq " #b ", %%mm2 \n\t"\
2208 "pxor %%mm7, %%mm2 \n\t"\
2209 PAVGB(%%mm2, %%mm5)\
2210 "paddb %%mm6, %%mm5 \n\t"\
2211 "movq %%mm5, %%mm2 \n\t"\
2212 "psllw $8, %%mm5 \n\t"\
2213 "pmaddwd %%mm5, %%mm5 \n\t"\
2214 "pmaddwd %%mm2, %%mm2 \n\t"\
2215 "paddd %%mm2, %%mm5 \n\t"\
2216 "psrld $14, %%mm5 \n\t"\
2217 "paddd %%mm5, %%mm0 \n\t"
2219 #else //defined (FAST_L2_DIFF)
2220 "pxor %%mm7, %%mm7 \n\t"
2221 "pxor %%mm0, %%mm0 \n\t"
2222 #define REAL_L2_DIFF_CORE(a, b)\
2223 "movq " #a ", %%mm5 \n\t"\
2224 "movq " #b ", %%mm2 \n\t"\
2225 "movq %%mm5, %%mm1 \n\t"\
2226 "movq %%mm2, %%mm3 \n\t"\
2227 "punpcklbw %%mm7, %%mm5 \n\t"\
2228 "punpckhbw %%mm7, %%mm1 \n\t"\
2229 "punpcklbw %%mm7, %%mm2 \n\t"\
2230 "punpckhbw %%mm7, %%mm3 \n\t"\
2231 "psubw %%mm2, %%mm5 \n\t"\
2232 "psubw %%mm3, %%mm1 \n\t"\
2233 "pmaddwd %%mm5, %%mm5 \n\t"\
2234 "pmaddwd %%mm1, %%mm1 \n\t"\
2235 "paddd %%mm1, %%mm5 \n\t"\
2236 "paddd %%mm5, %%mm0 \n\t"
2238 #endif //defined (FAST_L2_DIFF)
2240 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
2242 L2_DIFF_CORE((%0) , (%1))
2243 L2_DIFF_CORE((%0, %2) , (%1, %2))
2244 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
2245 L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa))
2246 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
2247 L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd))
2248 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
2249 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
2253 "movq %%mm0, %%mm4 \n\t"
2254 "psrlq $32, %%mm0 \n\t"
2255 "paddd %%mm0, %%mm4 \n\t"
2256 "movd %%mm4, %%ecx \n\t"
2257 "shll $2, %%ecx \n\t"
2258 "mov %3, %%"REG_d
" \n\t"
2259 "addl -4(%%"REG_d
"), %%ecx \n\t"
2260 "addl 4(%%"REG_d
"), %%ecx \n\t"
2261 "addl -1024(%%"REG_d
"), %%ecx \n\t"
2262 "addl $4, %%ecx \n\t"
2263 "addl 1024(%%"REG_d
"), %%ecx \n\t"
2264 "shrl $3, %%ecx \n\t"
2265 "movl %%ecx, (%%"REG_d
") \n\t"
2270 "cmpl 512(%%"REG_d
"), %%ecx \n\t"
2272 "cmpl 516(%%"REG_d
"), %%ecx \n\t"
2275 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t"
2276 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t"
2277 "movq (%0), %%mm0 \n\t"
2278 "movq (%0, %2), %%mm1 \n\t"
2279 "movq (%0, %2, 2), %%mm2 \n\t"
2280 "movq (%0, %%"REG_a
"), %%mm3 \n\t"
2281 "movq (%0, %2, 4), %%mm4 \n\t"
2282 "movq (%0, %%"REG_d
"), %%mm5 \n\t"
2283 "movq (%0, %%"REG_a
", 2), %%mm6 \n\t"
2284 "movq (%0, %%"REG_c
"), %%mm7 \n\t"
2285 "movq %%mm0, (%1) \n\t"
2286 "movq %%mm1, (%1, %2) \n\t"
2287 "movq %%mm2, (%1, %2, 2) \n\t"
2288 "movq %%mm3, (%1, %%"REG_a
") \n\t"
2289 "movq %%mm4, (%1, %2, 4) \n\t"
2290 "movq %%mm5, (%1, %%"REG_d
") \n\t"
2291 "movq %%mm6, (%1, %%"REG_a
", 2) \n\t"
2292 "movq %%mm7, (%1, %%"REG_c
") \n\t"
2296 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t"
2297 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t"
2298 "movq (%0), %%mm0 \n\t"
2300 "movq (%0, %2), %%mm1 \n\t"
2301 PAVGB((%1, %2), %%mm1)
2302 "movq (%0, %2, 2), %%mm2 \n\t"
2303 PAVGB((%1, %2, 2), %%mm2)
2304 "movq (%0, %%"REG_a
"), %%mm3 \n\t"
2305 PAVGB((%1, %%REGa), %%mm3)
2306 "movq (%0, %2, 4), %%mm4 \n\t"
2307 PAVGB((%1, %2, 4), %%mm4)
2308 "movq (%0, %%"REG_d
"), %%mm5 \n\t"
2309 PAVGB((%1, %%REGd), %%mm5)
2310 "movq (%0, %%"REG_a
", 2), %%mm6 \n\t"
2311 PAVGB((%1, %%REGa, 2), %%mm6)
2312 "movq (%0, %%"REG_c
"), %%mm7 \n\t"
2313 PAVGB((%1, %%REGc), %%mm7)
2314 "movq %%mm0, (%1) \n\t"
2315 "movq %%mm1, (%1, %2) \n\t"
2316 "movq %%mm2, (%1, %2, 2) \n\t"
2317 "movq %%mm3, (%1, %%"REG_a
") \n\t"
2318 "movq %%mm4, (%1, %2, 4) \n\t"
2319 "movq %%mm5, (%1, %%"REG_d
") \n\t"
2320 "movq %%mm6, (%1, %%"REG_a
", 2) \n\t"
2321 "movq %%mm7, (%1, %%"REG_c
") \n\t"
2322 "movq %%mm0, (%0) \n\t"
2323 "movq %%mm1, (%0, %2) \n\t"
2324 "movq %%mm2, (%0, %2, 2) \n\t"
2325 "movq %%mm3, (%0, %%"REG_a
") \n\t"
2326 "movq %%mm4, (%0, %2, 4) \n\t"
2327 "movq %%mm5, (%0, %%"REG_d
") \n\t"
2328 "movq %%mm6, (%0, %%"REG_a
", 2) \n\t"
2329 "movq %%mm7, (%0, %%"REG_c
") \n\t"
2333 "cmpl 508(%%"REG_d
"), %%ecx \n\t"
2336 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t"
2337 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t"
2338 "movq (%0), %%mm0 \n\t"
2339 "movq (%0, %2), %%mm1 \n\t"
2340 "movq (%0, %2, 2), %%mm2 \n\t"
2341 "movq (%0, %%"REG_a
"), %%mm3 \n\t"
2342 "movq (%1), %%mm4 \n\t"
2343 "movq (%1, %2), %%mm5 \n\t"
2344 "movq (%1, %2, 2), %%mm6 \n\t"
2345 "movq (%1, %%"REG_a
"), %%mm7 \n\t"
2354 "movq %%mm0, (%1) \n\t"
2355 "movq %%mm1, (%1, %2) \n\t"
2356 "movq %%mm2, (%1, %2, 2) \n\t"
2357 "movq %%mm3, (%1, %%"REG_a
") \n\t"
2358 "movq %%mm0, (%0) \n\t"
2359 "movq %%mm1, (%0, %2) \n\t"
2360 "movq %%mm2, (%0, %2, 2) \n\t"
2361 "movq %%mm3, (%0, %%"REG_a
") \n\t"
2363 "movq (%0, %2, 4), %%mm0 \n\t"
2364 "movq (%0, %%"REG_d
"), %%mm1 \n\t"
2365 "movq (%0, %%"REG_a
", 2), %%mm2 \n\t"
2366 "movq (%0, %%"REG_c
"), %%mm3 \n\t"
2367 "movq (%1, %2, 4), %%mm4 \n\t"
2368 "movq (%1, %%"REG_d
"), %%mm5 \n\t"
2369 "movq (%1, %%"REG_a
", 2), %%mm6 \n\t"
2370 "movq (%1, %%"REG_c
"), %%mm7 \n\t"
2379 "movq %%mm0, (%1, %2, 4) \n\t"
2380 "movq %%mm1, (%1, %%"REG_d
") \n\t"
2381 "movq %%mm2, (%1, %%"REG_a
", 2) \n\t"
2382 "movq %%mm3, (%1, %%"REG_c
") \n\t"
2383 "movq %%mm0, (%0, %2, 4) \n\t"
2384 "movq %%mm1, (%0, %%"REG_d
") \n\t"
2385 "movq %%mm2, (%0, %%"REG_a
", 2) \n\t"
2386 "movq %%mm3, (%0, %%"REG_c
") \n\t"
2390 "lea (%%"REG_a
", %2, 2), %%"REG_d
" \n\t"
2391 "lea (%%"REG_d
", %2, 2), %%"REG_c
" \n\t"
2392 "movq (%0), %%mm0 \n\t"
2393 "movq (%0, %2), %%mm1 \n\t"
2394 "movq (%0, %2, 2), %%mm2 \n\t"
2395 "movq (%0, %%"REG_a
"), %%mm3 \n\t"
2396 "movq (%1), %%mm4 \n\t"
2397 "movq (%1, %2), %%mm5 \n\t"
2398 "movq (%1, %2, 2), %%mm6 \n\t"
2399 "movq (%1, %%"REG_a
"), %%mm7 \n\t"
2412 "movq %%mm0, (%1) \n\t"
2413 "movq %%mm1, (%1, %2) \n\t"
2414 "movq %%mm2, (%1, %2, 2) \n\t"
2415 "movq %%mm3, (%1, %%"REG_a
") \n\t"
2416 "movq %%mm0, (%0) \n\t"
2417 "movq %%mm1, (%0, %2) \n\t"
2418 "movq %%mm2, (%0, %2, 2) \n\t"
2419 "movq %%mm3, (%0, %%"REG_a
") \n\t"
2421 "movq (%0, %2, 4), %%mm0 \n\t"
2422 "movq (%0, %%"REG_d
"), %%mm1 \n\t"
2423 "movq (%0, %%"REG_a
", 2), %%mm2 \n\t"
2424 "movq (%0, %%"REG_c
"), %%mm3 \n\t"
2425 "movq (%1, %2, 4), %%mm4 \n\t"
2426 "movq (%1, %%"REG_d
"), %%mm5 \n\t"
2427 "movq (%1, %%"REG_a
", 2), %%mm6 \n\t"
2428 "movq (%1, %%"REG_c
"), %%mm7 \n\t"
2441 "movq %%mm0, (%1, %2, 4) \n\t"
2442 "movq %%mm1, (%1, %%"REG_d
") \n\t"
2443 "movq %%mm2, (%1, %%"REG_a
", 2) \n\t"
2444 "movq %%mm3, (%1, %%"REG_c
") \n\t"
2445 "movq %%mm0, (%0, %2, 4) \n\t"
2446 "movq %%mm1, (%0, %%"REG_d
") \n\t"
2447 "movq %%mm2, (%0, %%"REG_a
", 2) \n\t"
2448 "movq %%mm3, (%0, %%"REG_c
") \n\t"
2452 ::
"r" (
src),
"r" (tempBlurred),
"r"((
x86_reg)stride),
"m" (tempBlurredPast)
2454 :
"%"REG_a,
"%"REG_d,
"%"REG_c,
"memory"
2456 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
2466 int ref= tempBlurred[ x + y*
stride ];
2467 int cur= src[ x + y*
stride ];
2479 +(*(tempBlurredPast-256))
2480 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
2481 +(*(tempBlurredPast+256))
2493 if(d > maxNoise[1]){
2494 if(d < maxNoise[2]){
2498 int ref= tempBlurred[ x + y*
stride ];
2499 int cur= src[ x + y*
stride ];
2500 tempBlurred[ x + y*
stride ]=
2514 if(d < maxNoise[0]){
2518 int ref= tempBlurred[ x + y*
stride ];
2519 int cur= src[ x + y*
stride ];
2520 tempBlurred[ x + y*
stride ]=
2522 (ref*7 + cur + 4)>>3;
2529 int ref= tempBlurred[ x + y*
stride ];
2530 int cur= src[ x + y*
stride ];
2531 tempBlurred[ x + y*
stride ]=
2533 (ref*3 + cur + 2)>>2;
2539 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
2541 #endif //TEMPLATE_PP_ALTIVEC
2548 int64_t dc_mask, eq_mask, both_masks;
2549 int64_t sums[10*8*2];
2553 "movq %0, %%mm7 \n\t"
2554 "movq %1, %%mm6 \n\t"
2555 : :
"m" (c->mmxDcOffset[c->nonBQP]),
"m" (c->mmxDcThreshold[c->nonBQP])
2559 "lea (%2, %3), %%"REG_a
" \n\t"
2563 "movq (%2), %%mm0 \n\t"
2564 "movq (%%"REG_a
"), %%mm1 \n\t"
2565 "movq %%mm1, %%mm3 \n\t"
2566 "movq %%mm1, %%mm4 \n\t"
2567 "psubb %%mm1, %%mm0 \n\t"
2568 "paddb %%mm7, %%mm0 \n\t"
2569 "pcmpgtb %%mm6, %%mm0 \n\t"
2571 "movq (%%"REG_a
",%3), %%mm2 \n\t"
2572 PMAXUB(%%mm2, %%mm4)
2573 PMINUB(%%mm2, %%mm3, %%mm5)
2574 "psubb %%mm2, %%mm1 \
n\t"
2575 "paddb %%mm7, %%mm1 \
n\t"
2576 "pcmpgtb %%mm6, %%mm1 \
n\t"
2577 "paddb %%mm1, %%mm0 \
n\t"
2579 "movq (%%"REG_a", %3, 2), %%mm1 \
n\t"
2580 PMAXUB(%%mm1, %%mm4)
2581 PMINUB(%%mm1, %%mm3, %%mm5)
2582 "psubb %%mm1, %%mm2 \
n\t"
2583 "paddb %%mm7, %%mm2 \
n\t"
2584 "pcmpgtb %%mm6, %%mm2 \
n\t"
2585 "paddb %%mm2, %%mm0 \
n\t"
2587 "lea (%%"REG_a", %3, 4), %%"REG_a" \
n\t"
2589 "movq (%2, %3, 4), %%mm2 \
n\t"
2590 PMAXUB(%%mm2, %%mm4)
2591 PMINUB(%%mm2, %%mm3, %%mm5)
2592 "psubb %%mm2, %%mm1 \
n\t"
2593 "paddb %%mm7, %%mm1 \
n\t"
2594 "pcmpgtb %%mm6, %%mm1 \
n\t"
2595 "paddb %%mm1, %%mm0 \
n\t"
2597 "movq (%%"REG_a"), %%mm1 \
n\t"
2598 PMAXUB(%%mm1, %%mm4)
2599 PMINUB(%%mm1, %%mm3, %%mm5)
2600 "psubb %%mm1, %%mm2 \
n\t"
2601 "paddb %%mm7, %%mm2 \
n\t"
2602 "pcmpgtb %%mm6, %%mm2 \
n\t"
2603 "paddb %%mm2, %%mm0 \
n\t"
2605 "movq (%%"REG_a", %3), %%mm2 \
n\t"
2606 PMAXUB(%%mm2, %%mm4)
2607 PMINUB(%%mm2, %%mm3, %%mm5)
2608 "psubb %%mm2, %%mm1 \
n\t"
2609 "paddb %%mm7, %%mm1 \
n\t"
2610 "pcmpgtb %%mm6, %%mm1 \
n\t"
2611 "paddb %%mm1, %%mm0 \
n\t"
2613 "movq (%%"REG_a", %3, 2), %%mm1 \
n\t"
2614 PMAXUB(%%mm1, %%mm4)
2615 PMINUB(%%mm1, %%mm3, %%mm5)
2616 "psubb %%mm1, %%mm2 \
n\t"
2617 "paddb %%mm7, %%mm2 \
n\t"
2618 "pcmpgtb %%mm6, %%mm2 \
n\t"
2619 "paddb %%mm2, %%mm0 \
n\t"
2621 "movq (%2, %3, 8), %%mm2 \
n\t"
2622 PMAXUB(%%mm2, %%mm4)
2623 PMINUB(%%mm2, %%mm3, %%mm5)
2624 "psubb %%mm2, %%mm1 \
n\t"
2625 "paddb %%mm7, %%mm1 \
n\t"
2626 "pcmpgtb %%mm6, %%mm1 \
n\t"
2627 "paddb %%mm1, %%mm0 \
n\t"
2629 "movq (%%"REG_a", %3, 4), %%mm1 \
n\t"
2630 "psubb %%mm1, %%mm2 \
n\t"
2631 "paddb %%mm7, %%mm2 \
n\t"
2632 "pcmpgtb %%mm6, %%mm2 \
n\t"
2633 "paddb %%mm2, %%mm0 \
n\t"
2634 "psubusb %%mm3, %%mm4 \
n\t"
2636 "pxor %%mm6, %%mm6 \
n\t"
2637 "movq %4, %%mm7 \
n\t"
2638 "paddusb %%mm7, %%mm7 \
n\t"
2639 "psubusb %%mm4, %%mm7 \
n\t"
2640 "pcmpeqb %%mm6, %%mm7 \
n\t"
2641 "pcmpeqb %%mm6, %%mm7 \
n\t"
2642 "movq %%mm7, %1 \
n\t"
2644 "movq %5, %%mm7 \
n\t"
2645 "punpcklbw %%mm7, %%mm7 \
n\t"
2646 "punpcklbw %%mm7, %%mm7 \
n\t"
2647 "punpcklbw %%mm7, %%mm7 \
n\t"
2648 "psubb %%mm0, %%mm6 \
n\t"
2649 "pcmpgtb %%mm7, %%mm6 \
n\t"
2650 "movq %%mm6, %0 \
n\t"
2652 : "=
m" (eq_mask), "=
m" (dc_mask)
2653 : "
r" (src), "
r" ((
x86_reg)step), "
m" (c->pQPb), "
m"(c->ppMode.flatnessThreshold)
2657 both_masks = dc_mask & eq_mask;
2661 int64_t *temp_sums= sums;
2664 "movq %2, %%mm0 \n\t"
2665 "pxor %%mm4, %%mm4 \n\t"
2667 "movq (%0), %%mm6 \n\t"
2668 "movq (%0, %1), %%mm5 \n\t"
2669 "movq %%mm5, %%mm1 \n\t"
2670 "movq %%mm6, %%mm2 \n\t"
2671 "psubusb %%mm6, %%mm5 \n\t"
2672 "psubusb %%mm1, %%mm2 \n\t"
2673 "por %%mm5, %%mm2 \n\t"
2674 "psubusb %%mm2, %%mm0 \n\t"
2675 "pcmpeqb %%mm4, %%mm0 \n\t"
2677 "pxor %%mm6, %%mm1 \n\t"
2678 "pand %%mm0, %%mm1 \n\t"
2679 "pxor %%mm1, %%mm6 \n\t"
2682 "movq (%0, %1, 8), %%mm5 \n\t"
2684 "movq (%0, %1, 8), %%mm7 \n\t"
2685 "movq %%mm5, %%mm1 \n\t"
2686 "movq %%mm7, %%mm2 \n\t"
2687 "psubusb %%mm7, %%mm5 \n\t"
2688 "psubusb %%mm1, %%mm2 \n\t"
2689 "por %%mm5, %%mm2 \n\t"
2690 "movq %2, %%mm0 \n\t"
2691 "psubusb %%mm2, %%mm0 \n\t"
2692 "pcmpeqb %%mm4, %%mm0 \n\t"
2694 "pxor %%mm7, %%mm1 \n\t"
2695 "pand %%mm0, %%mm1 \n\t"
2696 "pxor %%mm1, %%mm7 \n\t"
2698 "movq %%mm6, %%mm5 \n\t"
2699 "punpckhbw %%mm4, %%mm6 \n\t"
2700 "punpcklbw %%mm4, %%mm5 \n\t"
2703 "movq %%mm5, %%mm0 \n\t"
2704 "movq %%mm6, %%mm1 \n\t"
2705 "psllw $2, %%mm0 \n\t"
2706 "psllw $2, %%mm1 \n\t"
2707 "paddw "MANGLE(w04)
", %%mm0 \n\t"
2708 "paddw "MANGLE(w04)
", %%mm1 \n\t"
2711 "movq (%0), %%mm2 \n\t"\
2712 "movq (%0), %%mm3 \n\t"\
2714 "punpcklbw %%mm4, %%mm2 \n\t"\
2715 "punpckhbw %%mm4, %%mm3 \n\t"\
2716 "paddw %%mm2, %%mm0 \n\t"\
2717 "paddw %%mm3, %%mm1 \n\t"
2720 "movq (%0), %%mm2 \n\t"\
2721 "movq (%0), %%mm3 \n\t"\
2723 "punpcklbw %%mm4, %%mm2 \n\t"\
2724 "punpckhbw %%mm4, %%mm3 \n\t"\
2725 "psubw %%mm2, %%mm0 \n\t"\
2726 "psubw %%mm3, %%mm1 \n\t"
2732 "movq %%mm0, (%3) \n\t"
2733 "movq %%mm1, 8(%3) \n\t"
2736 "psubw %%mm5, %%mm0 \n\t"
2737 "psubw %%mm6, %%mm1 \n\t"
2738 "movq %%mm0, 16(%3) \n\t"
2739 "movq %%mm1, 24(%3) \n\t"
2742 "psubw %%mm5, %%mm0 \n\t"
2743 "psubw %%mm6, %%mm1 \n\t"
2744 "movq %%mm0, 32(%3) \n\t"
2745 "movq %%mm1, 40(%3) \n\t"
2748 "psubw %%mm5, %%mm0 \n\t"
2749 "psubw %%mm6, %%mm1 \n\t"
2750 "movq %%mm0, 48(%3) \n\t"
2751 "movq %%mm1, 56(%3) \n\t"
2754 "psubw %%mm5, %%mm0 \n\t"
2755 "psubw %%mm6, %%mm1 \n\t"
2756 "movq %%mm0, 64(%3) \n\t"
2757 "movq %%mm1, 72(%3) \n\t"
2759 "movq %%mm7, %%mm6 \n\t"
2760 "punpckhbw %%mm4, %%mm7 \n\t"
2761 "punpcklbw %%mm4, %%mm6 \n\t"
2767 "movq %%mm0, 80(%3) \n\t"
2768 "movq %%mm1, 88(%3) \n\t"
2771 "paddw %%mm6, %%mm0 \n\t"
2772 "paddw %%mm7, %%mm1 \n\t"
2773 "movq %%mm0, 96(%3) \n\t"
2774 "movq %%mm1, 104(%3) \n\t"
2777 "paddw %%mm6, %%mm0 \n\t"
2778 "paddw %%mm7, %%mm1 \n\t"
2779 "movq %%mm0, 112(%3) \n\t"
2780 "movq %%mm1, 120(%3) \n\t"
2783 "paddw %%mm6, %%mm0 \n\t"
2784 "paddw %%mm7, %%mm1 \n\t"
2785 "movq %%mm0, 128(%3) \n\t"
2786 "movq %%mm1, 136(%3) \n\t"
2789 "paddw %%mm6, %%mm0 \n\t"
2790 "paddw %%mm7, %%mm1 \n\t"
2791 "movq %%mm0, 144(%3) \n\t"
2792 "movq %%mm1, 152(%3) \n\t"
2797 :
"r" ((
x86_reg)step),
"m" (c->pQPb),
"r"(sums),
"g"(src)
2804 "movq %4, %%mm6 \n\t"
2805 "pcmpeqb %%mm5, %%mm5 \n\t"
2806 "pxor %%mm6, %%mm5 \n\t"
2807 "pxor %%mm7, %%mm7 \n\t"
2810 "movq (%1), %%mm0 \n\t"
2811 "movq 8(%1), %%mm1 \n\t"
2812 "paddw 32(%1), %%mm0 \n\t"
2813 "paddw 40(%1), %%mm1 \n\t"
2814 "movq (%0, %3), %%mm2 \n\t"
2815 "movq %%mm2, %%mm3 \n\t"
2816 "movq %%mm2, %%mm4 \n\t"
2817 "punpcklbw %%mm7, %%mm2 \n\t"
2818 "punpckhbw %%mm7, %%mm3 \n\t"
2819 "paddw %%mm2, %%mm0 \n\t"
2820 "paddw %%mm3, %%mm1 \n\t"
2821 "paddw %%mm2, %%mm0 \n\t"
2822 "paddw %%mm3, %%mm1 \n\t"
2823 "psrlw $4, %%mm0 \n\t"
2824 "psrlw $4, %%mm1 \n\t"
2825 "packuswb %%mm1, %%mm0 \n\t"
2826 "pand %%mm6, %%mm0 \n\t"
2827 "pand %%mm5, %%mm4 \n\t"
2828 "por %%mm4, %%mm0 \n\t"
2829 "movq %%mm0, (%0, %3) \n\t"
2834 :
"+r"(
offset),
"+r"(temp_sums)
2840 if(eq_mask != -1LL){
2844 "pxor %%mm7, %%mm7 \n\t"
2848 "movq (%0), %%mm0 \n\t"
2849 "movq %%mm0, %%mm1 \n\t"
2850 "punpcklbw %%mm7, %%mm0 \n\t"
2851 "punpckhbw %%mm7, %%mm1 \n\t"
2853 "movq (%0, %1), %%mm2 \n\t"
2854 "lea (%0, %1, 2), %%"REG_a
" \n\t"
2855 "movq %%mm2, %%mm3 \n\t"
2856 "punpcklbw %%mm7, %%mm2 \n\t"
2857 "punpckhbw %%mm7, %%mm3 \n\t"
2859 "movq (%%"REG_a
"), %%mm4 \n\t"
2860 "movq %%mm4, %%mm5 \n\t"
2861 "punpcklbw %%mm7, %%mm4 \n\t"
2862 "punpckhbw %%mm7, %%mm5 \n\t"
2864 "paddw %%mm0, %%mm0 \n\t"
2865 "paddw %%mm1, %%mm1 \n\t"
2866 "psubw %%mm4, %%mm2 \n\t"
2867 "psubw %%mm5, %%mm3 \n\t"
2868 "psubw %%mm2, %%mm0 \n\t"
2869 "psubw %%mm3, %%mm1 \n\t"
2871 "psllw $2, %%mm2 \n\t"
2872 "psllw $2, %%mm3 \n\t"
2873 "psubw %%mm2, %%mm0 \n\t"
2874 "psubw %%mm3, %%mm1 \n\t"
2876 "movq (%%"REG_a
", %1), %%mm2 \n\t"
2877 "movq %%mm2, %%mm3 \n\t"
2878 "punpcklbw %%mm7, %%mm2 \n\t"
2879 "punpckhbw %%mm7, %%mm3 \n\t"
2881 "psubw %%mm2, %%mm0 \n\t"
2882 "psubw %%mm3, %%mm1 \n\t"
2883 "psubw %%mm2, %%mm0 \n\t"
2884 "psubw %%mm3, %%mm1 \n\t"
2885 "movq %%mm0, (%4) \n\t"
2886 "movq %%mm1, 8(%4) \n\t"
2888 "movq (%%"REG_a
", %1, 2), %%mm0 \n\t"
2889 "movq %%mm0, %%mm1 \n\t"
2890 "punpcklbw %%mm7, %%mm0 \n\t"
2891 "punpckhbw %%mm7, %%mm1 \n\t"
2893 "psubw %%mm0, %%mm2 \n\t"
2894 "psubw %%mm1, %%mm3 \n\t"
2895 "movq %%mm2, 16(%4) \n\t"
2896 "movq %%mm3, 24(%4) \n\t"
2897 "paddw %%mm4, %%mm4 \n\t"
2898 "paddw %%mm5, %%mm5 \n\t"
2899 "psubw %%mm2, %%mm4 \n\t"
2900 "psubw %%mm3, %%mm5 \n\t"
2902 "lea (%%"REG_a
", %1), %0 \n\t"
2903 "psllw $2, %%mm2 \n\t"
2904 "psllw $2, %%mm3 \n\t"
2905 "psubw %%mm2, %%mm4 \n\t"
2906 "psubw %%mm3, %%mm5 \n\t"
2908 "movq (%0, %1, 2), %%mm2 \n\t"
2909 "movq %%mm2, %%mm3 \n\t"
2910 "punpcklbw %%mm7, %%mm2 \n\t"
2911 "punpckhbw %%mm7, %%mm3 \n\t"
2912 "psubw %%mm2, %%mm4 \n\t"
2913 "psubw %%mm3, %%mm5 \n\t"
2914 "psubw %%mm2, %%mm4 \n\t"
2915 "psubw %%mm3, %%mm5 \n\t"
2917 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t"
2918 "punpcklbw %%mm7, %%mm6 \n\t"
2919 "psubw %%mm6, %%mm2 \n\t"
2920 "movq (%%"REG_a
", %1, 4), %%mm6 \n\t"
2921 "punpckhbw %%mm7, %%mm6 \n\t"
2922 "psubw %%mm6, %%mm3 \n\t"
2924 "paddw %%mm0, %%mm0 \n\t"
2925 "paddw %%mm1, %%mm1 \n\t"
2926 "psubw %%mm2, %%mm0 \n\t"
2927 "psubw %%mm3, %%mm1 \n\t"
2929 "psllw $2, %%mm2 \n\t"
2930 "psllw $2, %%mm3 \n\t"
2931 "psubw %%mm2, %%mm0 \n\t"
2932 "psubw %%mm3, %%mm1 \n\t"
2934 "movq (%0, %1, 4), %%mm2 \n\t"
2935 "movq %%mm2, %%mm3 \n\t"
2936 "punpcklbw %%mm7, %%mm2 \n\t"
2937 "punpckhbw %%mm7, %%mm3 \n\t"
2939 "paddw %%mm2, %%mm2 \n\t"
2940 "paddw %%mm3, %%mm3 \n\t"
2941 "psubw %%mm2, %%mm0 \n\t"
2942 "psubw %%mm3, %%mm1 \n\t"
2944 "movq (%4), %%mm2 \n\t"
2945 "movq 8(%4), %%mm3 \n\t"
2947 #if TEMPLATE_PP_MMXEXT
2948 "movq %%mm7, %%mm6 \n\t"
2949 "psubw %%mm0, %%mm6 \n\t"
2950 "pmaxsw %%mm6, %%mm0 \n\t"
2951 "movq %%mm7, %%mm6 \n\t"
2952 "psubw %%mm1, %%mm6 \n\t"
2953 "pmaxsw %%mm6, %%mm1 \n\t"
2954 "movq %%mm7, %%mm6 \n\t"
2955 "psubw %%mm2, %%mm6 \n\t"
2956 "pmaxsw %%mm6, %%mm2 \n\t"
2957 "movq %%mm7, %%mm6 \n\t"
2958 "psubw %%mm3, %%mm6 \n\t"
2959 "pmaxsw %%mm6, %%mm3 \n\t"
2961 "movq %%mm7, %%mm6 \n\t"
2962 "pcmpgtw %%mm0, %%mm6 \n\t"
2963 "pxor %%mm6, %%mm0 \n\t"
2964 "psubw %%mm6, %%mm0 \n\t"
2965 "movq %%mm7, %%mm6 \n\t"
2966 "pcmpgtw %%mm1, %%mm6 \n\t"
2967 "pxor %%mm6, %%mm1 \n\t"
2968 "psubw %%mm6, %%mm1 \n\t"
2969 "movq %%mm7, %%mm6 \n\t"
2970 "pcmpgtw %%mm2, %%mm6 \n\t"
2971 "pxor %%mm6, %%mm2 \n\t"
2972 "psubw %%mm6, %%mm2 \n\t"
2973 "movq %%mm7, %%mm6 \n\t"
2974 "pcmpgtw %%mm3, %%mm6 \n\t"
2975 "pxor %%mm6, %%mm3 \n\t"
2976 "psubw %%mm6, %%mm3 \n\t"
2979 #if TEMPLATE_PP_MMXEXT
2980 "pminsw %%mm2, %%mm0 \n\t"
2981 "pminsw %%mm3, %%mm1 \n\t"
2983 "movq %%mm0, %%mm6 \n\t"
2984 "psubusw %%mm2, %%mm6 \n\t"
2985 "psubw %%mm6, %%mm0 \n\t"
2986 "movq %%mm1, %%mm6 \n\t"
2987 "psubusw %%mm3, %%mm6 \n\t"
2988 "psubw %%mm6, %%mm1 \n\t"
2991 "movd %2, %%mm2 \n\t"
2992 "punpcklbw %%mm7, %%mm2 \n\t"
2994 "movq %%mm7, %%mm6 \n\t"
2995 "pcmpgtw %%mm4, %%mm6 \n\t"
2996 "pxor %%mm6, %%mm4 \n\t"
2997 "psubw %%mm6, %%mm4 \n\t"
2998 "pcmpgtw %%mm5, %%mm7 \n\t"
2999 "pxor %%mm7, %%mm5 \n\t"
3000 "psubw %%mm7, %%mm5 \n\t"
3002 "psllw $3, %%mm2 \n\t"
3003 "movq %%mm2, %%mm3 \n\t"
3004 "pcmpgtw %%mm4, %%mm2 \n\t"
3005 "pcmpgtw %%mm5, %%mm3 \n\t"
3006 "pand %%mm2, %%mm4 \n\t"
3007 "pand %%mm3, %%mm5 \n\t"
3010 "psubusw %%mm0, %%mm4 \n\t"
3011 "psubusw %%mm1, %%mm5 \n\t"
3014 "movq "MANGLE(w05)
", %%mm2 \n\t"
3015 "pmullw %%mm2, %%mm4 \n\t"
3016 "pmullw %%mm2, %%mm5 \n\t"
3017 "movq "MANGLE(w20)
", %%mm2 \n\t"
3018 "paddw %%mm2, %%mm4 \n\t"
3019 "paddw %%mm2, %%mm5 \n\t"
3020 "psrlw $6, %%mm4 \n\t"
3021 "psrlw $6, %%mm5 \n\t"
3023 "movq 16(%4), %%mm0 \n\t"
3024 "movq 24(%4), %%mm1 \n\t"
3026 "pxor %%mm2, %%mm2 \n\t"
3027 "pxor %%mm3, %%mm3 \n\t"
3029 "pcmpgtw %%mm0, %%mm2 \n\t"
3030 "pcmpgtw %%mm1, %%mm3 \n\t"
3031 "pxor %%mm2, %%mm0 \n\t"
3032 "pxor %%mm3, %%mm1 \n\t"
3033 "psubw %%mm2, %%mm0 \n\t"
3034 "psubw %%mm3, %%mm1 \n\t"
3035 "psrlw $1, %%mm0 \n\t"
3036 "psrlw $1, %%mm1 \n\t"
3038 "pxor %%mm6, %%mm2 \n\t"
3039 "pxor %%mm7, %%mm3 \n\t"
3040 "pand %%mm2, %%mm4 \n\t"
3041 "pand %%mm3, %%mm5 \n\t"
3043 #if TEMPLATE_PP_MMXEXT
3044 "pminsw %%mm0, %%mm4 \n\t"
3045 "pminsw %%mm1, %%mm5 \n\t"
3047 "movq %%mm4, %%mm2 \n\t"
3048 "psubusw %%mm0, %%mm2 \n\t"
3049 "psubw %%mm2, %%mm4 \n\t"
3050 "movq %%mm5, %%mm2 \n\t"
3051 "psubusw %%mm1, %%mm2 \n\t"
3052 "psubw %%mm2, %%mm5 \n\t"
3054 "pxor %%mm6, %%mm4 \n\t"
3055 "pxor %%mm7, %%mm5 \n\t"
3056 "psubw %%mm6, %%mm4 \n\t"
3057 "psubw %%mm7, %%mm5 \n\t"
3058 "packsswb %%mm5, %%mm4 \n\t"
3059 "movq %3, %%mm1 \n\t"
3060 "pandn %%mm4, %%mm1 \n\t"
3061 "movq (%0), %%mm0 \n\t"
3062 "paddb %%mm1, %%mm0 \n\t"
3063 "movq %%mm0, (%0) \n\t"
3064 "movq (%0, %1), %%mm0 \n\t"
3065 "psubb %%mm1, %%mm0 \n\t"
3066 "movq %%mm0, (%0, %1) \n\t"
3069 :
"r" ((
x86_reg)step),
"m" (c->pQPb),
"m"(eq_mask),
"r"(tmp)
3081 #endif //TEMPLATE_PP_MMX
3090 #undef REAL_SCALED_CPY
3094 int levelFix, int64_t *packedOffsetAndScale)
3096 #if !TEMPLATE_PP_MMX || !HAVE_6REGS
3100 #if TEMPLATE_PP_MMX && HAVE_6REGS
3102 "movq (%%"REG_a
"), %%mm2 \n\t"
3103 "movq 8(%%"REG_a
"), %%mm3 \n\t"
3104 "lea (%2,%4), %%"REG_a
" \n\t"
3105 "lea (%3,%5), %%"REG_d
" \n\t"
3106 "pxor %%mm4, %%mm4 \n\t"
3107 #if TEMPLATE_PP_MMXEXT
3108 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
3109 "movq " #src1 ", %%mm0 \n\t"\
3110 "movq " #src1 ", %%mm5 \n\t"\
3111 "movq " #src2 ", %%mm1 \n\t"\
3112 "movq " #src2 ", %%mm6 \n\t"\
3113 "punpcklbw %%mm0, %%mm0 \n\t"\
3114 "punpckhbw %%mm5, %%mm5 \n\t"\
3115 "punpcklbw %%mm1, %%mm1 \n\t"\
3116 "punpckhbw %%mm6, %%mm6 \n\t"\
3117 "pmulhuw %%mm3, %%mm0 \n\t"\
3118 "pmulhuw %%mm3, %%mm5 \n\t"\
3119 "pmulhuw %%mm3, %%mm1 \n\t"\
3120 "pmulhuw %%mm3, %%mm6 \n\t"\
3121 "psubw %%mm2, %%mm0 \n\t"\
3122 "psubw %%mm2, %%mm5 \n\t"\
3123 "psubw %%mm2, %%mm1 \n\t"\
3124 "psubw %%mm2, %%mm6 \n\t"\
3125 "packuswb %%mm5, %%mm0 \n\t"\
3126 "packuswb %%mm6, %%mm1 \n\t"\
3127 "movq %%mm0, " #dst1 " \n\t"\
3128 "movq %%mm1, " #dst2 " \n\t"\
3130 #else //TEMPLATE_PP_MMXEXT
3131 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
3132 "movq " #src1 ", %%mm0 \n\t"\
3133 "movq " #src1 ", %%mm5 \n\t"\
3134 "punpcklbw %%mm4, %%mm0 \n\t"\
3135 "punpckhbw %%mm4, %%mm5 \n\t"\
3136 "psubw %%mm2, %%mm0 \n\t"\
3137 "psubw %%mm2, %%mm5 \n\t"\
3138 "movq " #src2 ", %%mm1 \n\t"\
3139 "psllw $6, %%mm0 \n\t"\
3140 "psllw $6, %%mm5 \n\t"\
3141 "pmulhw %%mm3, %%mm0 \n\t"\
3142 "movq " #src2 ", %%mm6 \n\t"\
3143 "pmulhw %%mm3, %%mm5 \n\t"\
3144 "punpcklbw %%mm4, %%mm1 \n\t"\
3145 "punpckhbw %%mm4, %%mm6 \n\t"\
3146 "psubw %%mm2, %%mm1 \n\t"\
3147 "psubw %%mm2, %%mm6 \n\t"\
3148 "psllw $6, %%mm1 \n\t"\
3149 "psllw $6, %%mm6 \n\t"\
3150 "pmulhw %%mm3, %%mm1 \n\t"\
3151 "pmulhw %%mm3, %%mm6 \n\t"\
3152 "packuswb %%mm5, %%mm0 \n\t"\
3153 "packuswb %%mm6, %%mm1 \n\t"\
3154 "movq %%mm0, " #dst1 " \n\t"\
3155 "movq %%mm1, " #dst2 " \n\t"\
3157 #endif //TEMPLATE_PP_MMXEXT
3158 #define SCALED_CPY(src1, src2, dst1, dst2)\
3159 REAL_SCALED_CPY(src1, src2, dst1, dst2)
3161 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
3162 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
3163 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
3164 "lea (%%"REG_a
",%4,4), %%"REG_a
" \n\t"
3165 "lea (%%"REG_d
",%5,4), %%"REG_d
" \n\t"
3166 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
3169 :
"=&a" (packedOffsetAndScale)
3170 :
"0" (packedOffsetAndScale),
3177 #else //TEMPLATE_PP_MMX && HAVE_6REGS
3179 memcpy( &(dst[dstStride*i]),
3181 #endif //TEMPLATE_PP_MMX && HAVE_6REGS
3183 #if TEMPLATE_PP_MMX && HAVE_6REGS
3185 "lea (%0,%2), %%"REG_a
" \n\t"
3186 "lea (%1,%3), %%"REG_d
" \n\t"
3188 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
3189 "movq " #src1 ", %%mm0 \n\t"\
3190 "movq " #src2 ", %%mm1 \n\t"\
3191 "movq %%mm0, " #dst1 " \n\t"\
3192 "movq %%mm1, " #dst2 " \n\t"\
3194 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
3195 REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
3197 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
3198 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
3199 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
3200 "lea (%%"REG_a
",%2,4), %%"REG_a
" \n\t"
3201 "lea (%%"REG_d
",%3,4), %%"REG_d
" \n\t"
3202 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
3208 :
"%"REG_a,
"%"REG_d
3210 #else //TEMPLATE_PP_MMX && HAVE_6REGS
3212 memcpy( &(dst[dstStride*i]),
3214 #endif //TEMPLATE_PP_MMX && HAVE_6REGS
3225 "movq (%0), %%mm0 \n\t"
3226 "movq %%mm0, (%0, %1, 4) \n\t"
3228 "movq %%mm0, (%0) \n\t"
3229 "movq %%mm0, (%0, %1) \n\t"
3230 "movq %%mm0, (%0, %1, 2) \n\t"
3231 "movq %%mm0, (%0, %1, 4) \n\t"
3253 #ifdef TEMPLATE_PP_TIME_MODE
3254 const int mode= TEMPLATE_PP_TIME_MODE;
3256 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
3258 int black=0, white=255;
3259 int QPCorrecture= 256*256;
3266 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
3267 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
3270 uint64_t *
const yHistogram= c.yHistogram;
3271 uint8_t *
const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
3272 uint8_t *
const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32;
3277 av_log(
c2,
AV_LOG_WARNING,
"Visualization is currently only supported with the accurate deblock filter without SIMD\n");
3282 for(i=0; i<57; i++){
3283 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
3284 int threshold= offset*2 + 1;
3285 c.mmxDcOffset[i]= 0x7F -
offset;
3286 c.mmxDcThreshold[i]= 0x7F - threshold;
3287 c.mmxDcOffset[i]*= 0x0101010101010101LL;
3288 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
3302 else if(mode &
DERING) copyAhead=9;
3310 uint64_t maxClipped;
3316 if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256;
3318 for(i=0; i<256; i++){
3319 sum+= yHistogram[i];
3323 maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
3326 for(black=255; black>0; black--){
3327 if(clipped < maxClipped)
break;
3328 clipped-= yHistogram[black];
3332 for(white=0; white<256; white++){
3333 if(clipped < maxClipped)
break;
3334 clipped-= yHistogram[white];
3337 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
3339 #if TEMPLATE_PP_MMXEXT
3340 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
3341 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
3343 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
3344 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
3347 c.packedYOffset|= c.packedYOffset<<32;
3348 c.packedYOffset|= c.packedYOffset<<16;
3350 c.packedYScale|= c.packedYScale<<32;
3351 c.packedYScale|= c.packedYScale<<16;
3353 if(mode &
LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
3354 else QPCorrecture= 256*256;
3356 c.packedYScale= 0x0100010001000100LL;
3358 QPCorrecture= 256*256;
3364 const uint8_t *srcBlock= &(src[y*srcStride]);
3365 uint8_t *dstBlock= tempDst + dstStride;
3372 #if TEMPLATE_PP_MMXEXT && HAVE_6REGS
3381 "mov %4, %%"REG_a
" \n\t"
3382 "shr $2, %%"REG_a
" \n\t"
3383 "and $6, %%"REG_a
" \n\t"
3384 "add %5, %%"REG_a
" \n\t"
3385 "mov %%"REG_a
", %%"REG_d
" \n\t"
3386 "imul %1, %%"REG_a
" \n\t"
3387 "imul %3, %%"REG_d
" \n\t"
3388 "prefetchnta 32(%%"REG_a
", %0) \n\t"
3389 "prefetcht0 32(%%"REG_d
", %2) \n\t"
3390 "add %1, %%"REG_a
" \n\t"
3391 "add %3, %%"REG_d
" \n\t"
3392 "prefetchnta 32(%%"REG_a
", %0) \n\t"
3393 "prefetcht0 32(%%"REG_d
", %2) \n\t"
3394 ::
"r" (srcBlock),
"r" ((
x86_reg)srcStride),
"r" (dstBlock),
"r" ((
x86_reg)dstStride),
3396 :
"%"REG_a,
"%"REG_d
3399 #elif TEMPLATE_PP_3DNOW
3409 srcBlock + srcStride*8, srcStride, mode &
LEVEL_FIX, &c.packedYOffset);
3415 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3417 else if(mode & MEDIAN_DEINT_FILTER)
3419 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3423 else if(mode & LOWPASS5_DEINT_FILTER)
3431 if(width==
FFABS(dstStride))
3432 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
3435 for(i=0; i<copyAhead; i++){
3436 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
3443 const uint8_t *srcBlock= &(src[y*srcStride]);
3444 uint8_t *dstBlock= &(dst[y*dstStride]);
3446 uint8_t *tempBlock1= c.tempBlocks;
3447 uint8_t *tempBlock2= c.tempBlocks + 8;
3449 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
3450 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*
FFABS(QPStride)];
3458 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
3459 FFMAX(height-y-copyAhead, 0), srcStride);
3462 for(i=
FFMAX(height-y, 8); i<copyAhead+8; i++)
3463 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1),
FFABS(srcStride));
3466 linecpy(tempDst, dstBlock - dstStride,
FFMIN(height-y+1, copyAhead+1), dstStride);
3469 for(i=height-y+1; i<=copyAhead; i++)
3470 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1),
FFABS(dstStride));
3472 dstBlock= tempDst + dstStride;
3480 const int stride= dstStride;
3485 QP= QPptr[x>>qpHShift];
3486 c.nonBQP= nonBQPptr[x>>qpHShift];
3489 QP= (QP* QPCorrecture + 256*128)>>16;
3490 c.nonBQP= nonBQPptr[x>>4];
3491 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
3492 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
3497 "movd %1, %%mm7 \n\t"
3498 "packuswb %%mm7, %%mm7 \n\t"
3499 "packuswb %%mm7, %%mm7 \n\t"
3500 "packuswb %%mm7, %%mm7 \n\t"
3501 "movq %%mm7, %0 \n\t"
3508 #if TEMPLATE_PP_MMXEXT && HAVE_6REGS
3517 "mov %4, %%"REG_a
" \n\t"
3518 "shr $2, %%"REG_a
" \n\t"
3519 "and $6, %%"REG_a
" \n\t"
3520 "add %5, %%"REG_a
" \n\t"
3521 "mov %%"REG_a
", %%"REG_d
" \n\t"
3522 "imul %1, %%"REG_a
" \n\t"
3523 "imul %3, %%"REG_d
" \n\t"
3524 "prefetchnta 32(%%"REG_a
", %0) \n\t"
3525 "prefetcht0 32(%%"REG_d
", %2) \n\t"
3526 "add %1, %%"REG_a
" \n\t"
3527 "add %3, %%"REG_d
" \n\t"
3528 "prefetchnta 32(%%"REG_a
", %0) \n\t"
3529 "prefetcht0 32(%%"REG_d
", %2) \n\t"
3530 ::
"r" (srcBlock),
"r" ((
x86_reg)srcStride),
"r" (dstBlock),
"r" ((
x86_reg)dstStride),
3532 :
"%"REG_a,
"%"REG_d
3535 #elif TEMPLATE_PP_3DNOW
3545 srcBlock + srcStride*copyAhead, srcStride, mode &
LEVEL_FIX, &c.packedYOffset);
3549 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3551 else if(mode & MEDIAN_DEINT_FILTER)
3553 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3557 else if(mode & LOWPASS5_DEINT_FILTER)
3565 if(mode & V_X1_FILTER)
3567 else if(mode & V_DEBLOCK){
3580 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
3589 const int t=
RENAME(vertClassify)(tempBlock1, 16, &
c);
3596 RENAME(do_a_deblock)(tempBlock1, 16, 1, &
c,
mode);
3599 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
3602 if(mode & H_X1_FILTER)
3604 else if(mode & H_DEBLOCK){
3605 #if TEMPLATE_PP_ALTIVEC
3627 }
else if(mode & H_A_DEBLOCK){
3630 #endif //TEMPLATE_PP_MMX
3639 c.tempBlurred[isColor] + y*dstStride + x,
3640 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3641 c.ppMode.maxTmpNoise);
3649 tmpXchg= tempBlock1;
3650 tempBlock1= tempBlock2;
3651 tempBlock2 = tmpXchg;
3656 if(y > 0)
RENAME(
dering)(dstBlock - dstStride - 8, dstStride, &
c);
3661 c.tempBlurred[isColor] + y*dstStride + x,
3662 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3663 c.ppMode.maxTmpNoise);
3668 uint8_t *dstBlock= &(dst[y*dstStride]);
3669 if(width==
FFABS(dstStride))
3670 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
3673 for(i=0; i<height-
y; i++){
3674 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
3688 #if TEMPLATE_PP_3DNOW
3689 __asm__
volatile(
"femms");
3690 #elif TEMPLATE_PP_MMX
3691 __asm__
volatile(
"emms");
3694 #ifdef DEBUG_BRIGHTNESS
3698 for(i=0; i<256; i++)
3699 if(yHistogram[i] > max) max=yHistogram[i];
3701 for(i=1; i<256; i++){
3703 int start=yHistogram[i-1]/(max/256+1);
3704 int end=yHistogram[i]/(max/256+1);
3705 int inc= end > start ? 1 : -1;
3706 for(x=start; x!=end+inc; x+=inc)
3707 dst[ i*dstStride + x]+=128;
3710 for(i=0; i<100; i+=2){
3711 dst[ (white)*dstStride + i]+=128;
3712 dst[ (black)*dstStride + i]+=128;
3722 #undef TEMPLATE_PP_C
3723 #undef TEMPLATE_PP_ALTIVEC
3724 #undef TEMPLATE_PP_MMX
3725 #undef TEMPLATE_PP_MMXEXT
3726 #undef TEMPLATE_PP_3DNOW
3727 #undef TEMPLATE_PP_SSE2
static void RENAME() deInterlaceL5(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
Deinterlace the given block by filtering every line with a (-1 2 6 2 -1) filter.
static int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
#define AV_LOG_WARNING
Something somehow does not look correct.
#define DECLARE_ALIGNED(n, t, v)
static void RENAME() duplicate(uint8_t src[], int stride)
Duplicate the given 8 src pixels ? times upward.
static void RENAME() doVertLowPass(uint8_t *src, int stride, PPContext *c)
Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) using the...
static void RENAME() deInterlaceFF(uint8_t src[], int stride, uint8_t *tmp)
Deinterlace the given block by filtering every second line with a (-1 4 2 4 -1) filter.
static void transpose_16x8_char_toPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define LINEAR_BLEND_DEINT_FILTER
static av_cold int end(AVCodecContext *avctx)
static void transpose_8x16_char_fromPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
static void RENAME() deInterlaceBlendLinear(uint8_t src[], int stride, uint8_t *tmp)
Deinterlace the given block by filtering all lines with a (1 2 1) filter.
#define LOWPASS5_DEINT_FILTER
static void horizX1Filter(uint8_t *src, int stride, int QP)
Experimental Filter 1 (Horizontal) will not damage linear gradients Flat blocks should look like they...
static void RENAME() deInterlaceMedian(uint8_t src[], int stride)
Deinterlace the given block by applying a median filter to every second line.
static void linecpy(void *dest, const void *src, int lines, int stride)
#define MEDIAN_DEINT_FILTER
#define CUBIC_IPOL_DEINT_FILTER
static void RENAME() tempNoiseReducer(uint8_t *src, int stride, uint8_t *tempBlurred, uint32_t *tempBlurredPast, const int *maxNoise)
static void RENAME() dering(uint8_t src[], int stride, PPContext *c)
static const uint8_t offset[127][2]
static void RENAME() deInterlaceInterpolateLinear(uint8_t src[], int stride)
Deinterlace the given block by linearly interpolating every second line.
#define LINEAR_IPOL_DEINT_FILTER
static void RENAME() blockCopy(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, int levelFix, int64_t *packedOffsetAndScale)
Copy a block from src to dst and fixes the blacklevel.
#define XMM_CLOBBERS(...)
#define FFMPEG_DEINT_FILTER
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c)
static void RENAME() deInterlaceInterpolateCubic(uint8_t src[], int stride)
Deinterlace the given block by cubic interpolating every second line.
#define AV_LOG_INFO
Standard information.
static void RENAME() postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c)
Filter array of bytes (Y or U or V values)
static void RENAME() vertX1Filter(uint8_t *src, int stride, PPContext *co)
Experimental Filter 1 will not damage linear gradients Flat blocks should look like they were passed ...
BYTE int const BYTE int int int height
GLint GLenum GLboolean GLsizei stride
static void RENAME() doVertDefFilter(uint8_t src[], int stride, PPContext *c)
static void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define TEMP_NOISE_FILTER
#define LEVEL_FIX
Brightness & Contrast.
#define NAMED_CONSTRAINTS_ADD(...)