- Bug fix on MV prediction for MPEG4 caused by new H.263 GOB code.
[libav.git] / postproc / postprocess_template.c
CommitLineData
3057fa66
A
1/*
2 Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19/*
3b58b885 20 C MMX MMX2 3DNow
3057fa66
A
21isVertDC Ec Ec
22isVertMinMaxOk Ec Ec
3b58b885 23doVertLowPass E e e
3057fa66
A
24doVertDefFilter Ec Ec Ec
25isHorizDC Ec Ec
4e4dcbc5
MN
26isHorizMinMaxOk a E
27doHorizLowPass E e e
e5c30e06 28doHorizDefFilter Ec Ec Ec
2e212618 29deRing E e e*
3b58b885 30Vertical RKAlgo1 E a a
e5c30e06 31Horizontal RKAlgo1 a a
3b58b885
MN
32Vertical X1 a E E
33Horizontal X1 a E E
acced553
MN
34LinIpolDeinterlace e E E*
35CubicIpolDeinterlace a e e*
36LinBlendDeinterlace e E E*
a6be8111 37MedianDeinterlace Ec Ec
d5a1a995 38
3057fa66 39
13e00528 40* i dont have a 3dnow CPU -> its untested
3057fa66 41E = Exact implementation
acced553 42e = allmost exact implementation (slightly different rounding,...)
3057fa66
A
43a = alternative / approximate impl
44c = checked against the other implementations (-vo md5)
45*/
46
47/*
48TODO:
9f45d04d 49verify that everything workes as it should (how?)
3057fa66
A
50reduce the time wasted on the mem transfer
51implement dering
13e00528 52implement everything in C at least (done at the moment but ...)
3057fa66
A
53unroll stuff if instructions depend too much on the prior one
54we use 8x8 blocks for the horizontal filters, opendivx seems to use 8x4?
55move YScale thing to the end instead of fixing QP
13e00528 56write a faster and higher quality deblocking filter :)
d5a1a995
MN
57do something about the speed of the horizontal filters
58make the mainloop more flexible (variable number of blocks at once
59 (the if/else stuff per block is slowing things down)
9f45d04d 60compare the quality & speed of all filters
9f45d04d 61split this huge file
3b58b885 62fix warnings (unused vars, ...)
a6be8111 63noise reduction filters
e5c30e06 64border remover
8405b3fd 65optimize c versions
3057fa66
A
66...
67
68Notes:
13e00528
A
69*/
70
a6be8111 71//Changelog: use the CVS log
3057fa66 72
6c426cff 73#include "../config.h"
3057fa66
A
74#include <inttypes.h>
75#include <stdio.h>
d5a1a995 76#include <stdlib.h>
911879d1 77#include <string.h>
dda87e9f
PL
78#ifdef HAVE_MALLOC_H
79#include <malloc.h>
80#endif
3057fa66 81//#undef HAVE_MMX2
13e00528 82//#define HAVE_3DNOW
3057fa66 83//#undef HAVE_MMX
13e00528 84#include "postprocess.h"
3057fa66 85
e939e1c3
A
86#define MIN(a,b) ((a) > (b) ? (b) : (a))
87#define MAX(a,b) ((a) < (b) ? (b) : (a))
88#define ABS(a) ((a) > 0 ? (a) : (-(a)))
89#define SIGN(a) ((a) > 0 ? 1 : -1)
90
91#ifdef HAVE_MMX2
92#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
93#elif defined (HAVE_3DNOW)
94#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
95#endif
3057fa66 96
2e212618
MN
97#ifdef HAVE_MMX2
98#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
99#elif defined (HAVE_MMX)
100#define PMINUB(b,a,t) \
101 "movq " #a ", " #t " \n\t"\
102 "psubusb " #b ", " #t " \n\t"\
103 "psubb " #t ", " #a " \n\t"
104#endif
105
106#ifdef HAVE_MMX2
107#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
108#elif defined (HAVE_MMX)
109#define PMAXUB(a,b) \
110 "psubusb " #a ", " #b " \n\t"\
111 "paddb " #a ", " #b " \n\t"
112#endif
113
114
911879d1
MN
115#define GET_MODE_BUFFER_SIZE 500
116#define OPTIONS_ARRAY_SIZE 10
117
b28daef8 118#ifdef HAVE_MMX
3fe8e8f0
MN
119static volatile uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL;
120static volatile uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL;
b28daef8
MN
121static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL;
122static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL;
123static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL;
124static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL;
125static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL;
126static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL;
127static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL;
128static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL;
129static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL;
130static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL;
131static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL;
132static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL;
133static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL;
134static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL;
135static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL;
136static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL;
137static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL;
138static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL;
139static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL;
140static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL;
141static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL;
142static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL;
143static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL;
144static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL;
145static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL;
146static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL;
147static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL;
148static uint64_t __attribute__((aligned(8))) temp0=0;
149static uint64_t __attribute__((aligned(8))) temp1=0;
150static uint64_t __attribute__((aligned(8))) temp2=0;
151static uint64_t __attribute__((aligned(8))) temp3=0;
152static uint64_t __attribute__((aligned(8))) temp4=0;
153static uint64_t __attribute__((aligned(8))) temp5=0;
154static uint64_t __attribute__((aligned(8))) pQPb=0;
155static uint64_t __attribute__((aligned(8))) pQPb2=0;
156static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code
157#else
3057fa66
A
158static uint64_t packedYOffset= 0x0000000000000000LL;
159static uint64_t packedYScale= 0x0100010001000100LL;
4e4dcbc5 160static uint8_t tempBlocks[8*16*2]; //used for the horizontal code
b28daef8 161#endif
3057fa66
A
162
163int hFlatnessThreshold= 56 - 16;
164int vFlatnessThreshold= 56 - 16;
165
166//amount of "black" u r willing to loose to get a brightness corrected picture
167double maxClippedThreshold= 0.01;
168
911879d1 169int maxAllowedY=234;
658a85f2 170int minAllowedY=16;
3057fa66 171
911879d1
MN
172static struct PPFilter filters[]=
173{
174 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
175 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
176 {"vr", "rkvdeblock", 1, 2, 4, H_RK1_FILTER},
177 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
178 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
179 {"dr", "dering", 1, 5, 6, DERING},
180 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
181 {"lb", "linblenddeint", 0, 1, 6, LINEAR_BLEND_DEINT_FILTER},
182 {"li", "linipoldeint", 0, 1, 6, LINEAR_IPOL_DEINT_FILTER},
183 {"ci", "cubicipoldeint", 0, 1, 6, CUBIC_IPOL_DEINT_FILTER},
184 {"md", "mediandeint", 0, 1, 6, MEDIAN_DEINT_FILTER},
185 {NULL, NULL,0,0,0,0} //End Marker
186};
187
188static char *replaceTable[]=
189{
190 "default", "hdeblock:a,vdeblock:a,dering:a,autolevels",
191 "de", "hdeblock:a,vdeblock:a,dering:a,autolevels",
192 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels",
193 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels",
194 NULL //End Marker
195};
196
b28daef8 197#ifdef HAVE_MMX
e5c30e06
MN
198static inline void unusedVariableWarningFixer()
199{
200if(
201 packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000
202 + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110
203 + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F
b28daef8 204 + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4
e5c30e06
MN
205 + temp5 + pQPb== 0) b00=0;
206}
b28daef8 207#endif
e5c30e06 208
a6be8111 209#ifdef TIMING
3057fa66
A
210static inline long long rdtsc()
211{
212 long long l;
213 asm volatile( "rdtsc\n\t"
214 : "=A" (l)
215 );
216// printf("%d\n", int(l/1000));
217 return l;
218}
9a722af7 219#endif
3057fa66 220
9a722af7 221#ifdef HAVE_MMX2
3057fa66
A
222static inline void prefetchnta(void *p)
223{
224 asm volatile( "prefetchnta (%0)\n\t"
225 : : "r" (p)
226 );
227}
228
229static inline void prefetcht0(void *p)
230{
231 asm volatile( "prefetcht0 (%0)\n\t"
232 : : "r" (p)
233 );
234}
235
236static inline void prefetcht1(void *p)
237{
238 asm volatile( "prefetcht1 (%0)\n\t"
239 : : "r" (p)
240 );
241}
242
243static inline void prefetcht2(void *p)
244{
245 asm volatile( "prefetcht2 (%0)\n\t"
246 : : "r" (p)
247 );
248}
9a722af7 249#endif
3057fa66
A
250
251//FIXME? |255-0| = 1 (shouldnt be a problem ...)
252/**
acced553 253 * Check if the middle 8x8 Block in the given 8x16 block is flat
3057fa66 254 */
d5a1a995 255static inline int isVertDC(uint8_t src[], int stride){
3057fa66 256 int numEq= 0;
e5c30e06 257#ifndef HAVE_MMX
d5a1a995 258 int y;
e5c30e06 259#endif
acced553 260 src+= stride*4; // src points to begin of the 8x8 Block
3057fa66 261#ifdef HAVE_MMX
37da00fc
MN
262asm volatile(
263 "leal (%1, %2), %%eax \n\t"
264 "leal (%%eax, %2, 4), %%ebx \n\t"
265// 0 1 2 3 4 5 6 7 8 9
266// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
3057fa66
A
267 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F
268 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D
269 "movq (%1), %%mm0 \n\t"
37da00fc 270 "movq (%%eax), %%mm1 \n\t"
3057fa66
A
271 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
272 "paddb %%mm7, %%mm0 \n\t"
273 "pcmpgtb %%mm6, %%mm0 \n\t"
274
37da00fc 275 "movq (%%eax,%2), %%mm2 \n\t"
3057fa66
A
276 "psubb %%mm2, %%mm1 \n\t"
277 "paddb %%mm7, %%mm1 \n\t"
278 "pcmpgtb %%mm6, %%mm1 \n\t"
279 "paddb %%mm1, %%mm0 \n\t"
280
37da00fc 281 "movq (%%eax, %2, 2), %%mm1 \n\t"
3057fa66
A
282 "psubb %%mm1, %%mm2 \n\t"
283 "paddb %%mm7, %%mm2 \n\t"
284 "pcmpgtb %%mm6, %%mm2 \n\t"
285 "paddb %%mm2, %%mm0 \n\t"
286
37da00fc 287 "movq (%1, %2, 4), %%mm2 \n\t"
3057fa66
A
288 "psubb %%mm2, %%mm1 \n\t"
289 "paddb %%mm7, %%mm1 \n\t"
290 "pcmpgtb %%mm6, %%mm1 \n\t"
291 "paddb %%mm1, %%mm0 \n\t"
292
37da00fc 293 "movq (%%ebx), %%mm1 \n\t"
3057fa66
A
294 "psubb %%mm1, %%mm2 \n\t"
295 "paddb %%mm7, %%mm2 \n\t"
296 "pcmpgtb %%mm6, %%mm2 \n\t"
297 "paddb %%mm2, %%mm0 \n\t"
298
37da00fc 299 "movq (%%ebx, %2), %%mm2 \n\t"
3057fa66
A
300 "psubb %%mm2, %%mm1 \n\t"
301 "paddb %%mm7, %%mm1 \n\t"
302 "pcmpgtb %%mm6, %%mm1 \n\t"
303 "paddb %%mm1, %%mm0 \n\t"
304
37da00fc 305 "movq (%%ebx, %2, 2), %%mm1 \n\t"
3057fa66
A
306 "psubb %%mm1, %%mm2 \n\t"
307 "paddb %%mm7, %%mm2 \n\t"
308 "pcmpgtb %%mm6, %%mm2 \n\t"
309 "paddb %%mm2, %%mm0 \n\t"
310
311 " \n\t"
312 "movq %%mm0, %%mm1 \n\t"
313 "psrlw $8, %%mm0 \n\t"
314 "paddb %%mm1, %%mm0 \n\t"
e5c30e06
MN
315#ifdef HAVE_MMX2
316 "pshufw $0xF9, %%mm0, %%mm1 \n\t"
317 "paddb %%mm1, %%mm0 \n\t"
318 "pshufw $0xFE, %%mm0, %%mm1 \n\t"
319#else
3057fa66
A
320 "movq %%mm0, %%mm1 \n\t"
321 "psrlq $16, %%mm0 \n\t"
322 "paddb %%mm1, %%mm0 \n\t"
323 "movq %%mm0, %%mm1 \n\t"
324 "psrlq $32, %%mm0 \n\t"
e5c30e06 325#endif
3057fa66 326 "paddb %%mm1, %%mm0 \n\t"
3057fa66
A
327 "movd %%mm0, %0 \n\t"
328 : "=r" (numEq)
329 : "r" (src), "r" (stride)
4e4dcbc5 330 : "%eax", "%ebx"
3057fa66 331 );
3057fa66 332
37da00fc 333 numEq= (256 - numEq) &0xFF;
3057fa66
A
334
335#else
d5a1a995 336 for(y=0; y<BLOCK_SIZE-1; y++)
3057fa66
A
337 {
338 if(((src[0] - src[0+stride] + 1)&0xFFFF) < 3) numEq++;
339 if(((src[1] - src[1+stride] + 1)&0xFFFF) < 3) numEq++;
340 if(((src[2] - src[2+stride] + 1)&0xFFFF) < 3) numEq++;
341 if(((src[3] - src[3+stride] + 1)&0xFFFF) < 3) numEq++;
342 if(((src[4] - src[4+stride] + 1)&0xFFFF) < 3) numEq++;
343 if(((src[5] - src[5+stride] + 1)&0xFFFF) < 3) numEq++;
344 if(((src[6] - src[6+stride] + 1)&0xFFFF) < 3) numEq++;
345 if(((src[7] - src[7+stride] + 1)&0xFFFF) < 3) numEq++;
346 src+= stride;
347 }
348#endif
349/* if(abs(numEq - asmEq) > 0)
350 {
351 printf("\nasm:%d c:%d\n", asmEq, numEq);
352 for(int y=0; y<8; y++)
353 {
354 for(int x=0; x<8; x++)
355 {
356 printf("%d ", temp[x + y*stride]);
357 }
358 printf("\n");
359 }
360 }
361*/
d5a1a995
MN
362// for(int i=0; i<numEq/8; i++) src[i]=255;
363 return (numEq > vFlatnessThreshold) ? 1 : 0;
3057fa66
A
364}
365
d5a1a995 366static inline int isVertMinMaxOk(uint8_t src[], int stride, int QP)
3057fa66
A
367{
368#ifdef HAVE_MMX
369 int isOk;
acced553 370 src+= stride*3;
3057fa66
A
371 asm volatile(
372// "int $3 \n\t"
373 "movq (%1, %2), %%mm0 \n\t"
374 "movq (%1, %2, 8), %%mm1 \n\t"
375 "movq %%mm0, %%mm2 \n\t"
376 "psubusb %%mm1, %%mm0 \n\t"
377 "psubusb %%mm2, %%mm1 \n\t"
378 "por %%mm1, %%mm0 \n\t" // ABS Diff
379
380 "movq pQPb, %%mm7 \n\t" // QP,..., QP
381 "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
382 "psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0
383 "pcmpeqd b00, %%mm0 \n\t"
384 "psrlq $16, %%mm0 \n\t"
385 "pcmpeqd bFF, %%mm0 \n\t"
386// "movd %%mm0, (%1, %2, 4)\n\t"
387 "movd %%mm0, %0 \n\t"
388 : "=r" (isOk)
389 : "r" (src), "r" (stride)
390 );
ac0b0b2f 391 return isOk;
3057fa66
A
392#else
393
d5a1a995
MN
394 int isOk2= 1;
395 int x;
acced553 396 src+= stride*3;
d5a1a995 397 for(x=0; x<BLOCK_SIZE; x++)
3057fa66 398 {
d5a1a995 399 if(abs((int)src[x + stride] - (int)src[x + (stride<<3)]) > 2*QP) isOk2=0;
3057fa66
A
400 }
401/* if(isOk && !isOk2 || !isOk && isOk2)
402 {
403 printf("\nasm:%d c:%d QP:%d\n", isOk, isOk2, QP);
404 for(int y=0; y<9; y++)
405 {
406 for(int x=0; x<8; x++)
407 {
408 printf("%d ", src[x + y*stride]);
409 }
410 printf("\n");
411 }
412 } */
413
414 return isOk2;
415#endif
416
417}
418
419/**
acced553 420 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
a6be8111 421 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
3057fa66
A
422 */
423static inline void doVertLowPass(uint8_t *src, int stride, int QP)
424{
13e00528 425#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
acced553 426 src+= stride*3;
3057fa66 427 asm volatile( //"movv %0 %1 %2\n\t"
3057fa66 428 "movq pQPb, %%mm0 \n\t" // QP,..., QP
3057fa66
A
429
430 "movq (%0), %%mm6 \n\t"
431 "movq (%0, %1), %%mm5 \n\t"
432 "movq %%mm5, %%mm1 \n\t"
433 "movq %%mm6, %%mm2 \n\t"
434 "psubusb %%mm6, %%mm5 \n\t"
435 "psubusb %%mm1, %%mm2 \n\t"
436 "por %%mm5, %%mm2 \n\t" // ABS Diff of lines
437 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
438 "pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF
439
440 "pand %%mm2, %%mm6 \n\t"
441 "pandn %%mm1, %%mm2 \n\t"
442 "por %%mm2, %%mm6 \n\t"// First Line to Filter
443
444 "movq (%0, %1, 8), %%mm5 \n\t"
445 "leal (%0, %1, 4), %%eax \n\t"
446 "leal (%0, %1, 8), %%ebx \n\t"
447 "subl %1, %%ebx \n\t"
448 "addl %1, %0 \n\t" // %0 points to line 1 not 0
449 "movq (%0, %1, 8), %%mm7 \n\t"
450 "movq %%mm5, %%mm1 \n\t"
451 "movq %%mm7, %%mm2 \n\t"
452 "psubusb %%mm7, %%mm5 \n\t"
453 "psubusb %%mm1, %%mm2 \n\t"
454 "por %%mm5, %%mm2 \n\t" // ABS Diff of lines
455 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
456 "pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF
457
458 "pand %%mm2, %%mm7 \n\t"
459 "pandn %%mm1, %%mm2 \n\t"
460 "por %%mm2, %%mm7 \n\t" // First Line to Filter
461
462
463 // 1 2 3 4 5 6 7 8
464 // %0 %0+%1 %0+2%1 eax %0+4%1 eax+2%1 ebx eax+4%1
465 // 6 4 2 2 1 1
466 // 6 4 4 2
467 // 6 8 2
acced553 468
3057fa66
A
469 "movq (%0, %1), %%mm0 \n\t" // 1
470 "movq %%mm0, %%mm1 \n\t" // 1
13e00528
A
471 PAVGB(%%mm6, %%mm0) //1 1 /2
472 PAVGB(%%mm6, %%mm0) //3 1 /4
3057fa66
A
473
474 "movq (%0, %1, 4), %%mm2 \n\t" // 1
475 "movq %%mm2, %%mm5 \n\t" // 1
13e00528
A
476 PAVGB((%%eax), %%mm2) // 11 /2
477 PAVGB((%0, %1, 2), %%mm2) // 211 /4
3057fa66
A
478 "movq %%mm2, %%mm3 \n\t" // 211 /4
479 "movq (%0), %%mm4 \n\t" // 1
13e00528
A
480 PAVGB(%%mm4, %%mm3) // 4 211 /8
481 PAVGB(%%mm0, %%mm3) //642211 /16
3057fa66
A
482 "movq %%mm3, (%0) \n\t" // X
483 // mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9
484 "movq %%mm1, %%mm0 \n\t" // 1
13e00528 485 PAVGB(%%mm6, %%mm0) //1 1 /2
3057fa66 486 "movq %%mm4, %%mm3 \n\t" // 1
13e00528
A
487 PAVGB((%0,%1,2), %%mm3) // 1 1 /2
488 PAVGB((%%eax,%1,2), %%mm5) // 11 /2
489 PAVGB((%%eax), %%mm5) // 211 /4
490 PAVGB(%%mm5, %%mm3) // 2 2211 /8
491 PAVGB(%%mm0, %%mm3) //4242211 /16
3057fa66
A
492 "movq %%mm3, (%0,%1) \n\t" // X
493 // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
13e00528 494 PAVGB(%%mm4, %%mm6) //11 /2
3057fa66 495 "movq (%%ebx), %%mm0 \n\t" // 1
13e00528 496 PAVGB((%%eax, %1, 2), %%mm0) // 11/2
3057fa66 497 "movq %%mm0, %%mm3 \n\t" // 11/2
13e00528
A
498 PAVGB(%%mm1, %%mm0) // 2 11/4
499 PAVGB(%%mm6, %%mm0) //222 11/8
500 PAVGB(%%mm2, %%mm0) //22242211/16
3057fa66
A
501 "movq (%0, %1, 2), %%mm2 \n\t" // 1
502 "movq %%mm0, (%0, %1, 2) \n\t" // X
503 // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
504 "movq (%%eax, %1, 4), %%mm0 \n\t" // 1
13e00528
A
505 PAVGB((%%ebx), %%mm0) // 11 /2
506 PAVGB(%%mm0, %%mm6) //11 11 /4
507 PAVGB(%%mm1, %%mm4) // 11 /2
508 PAVGB(%%mm2, %%mm1) // 11 /2
509 PAVGB(%%mm1, %%mm6) //1122 11 /8
510 PAVGB(%%mm5, %%mm6) //112242211 /16
3057fa66
A
511 "movq (%%eax), %%mm5 \n\t" // 1
512 "movq %%mm6, (%%eax) \n\t" // X
513 // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
514 "movq (%%eax, %1, 4), %%mm6 \n\t" // 1
13e00528
A
515 PAVGB(%%mm7, %%mm6) // 11 /2
516 PAVGB(%%mm4, %%mm6) // 11 11 /4
517 PAVGB(%%mm3, %%mm6) // 11 2211 /8
518 PAVGB(%%mm5, %%mm2) // 11 /2
3057fa66 519 "movq (%0, %1, 4), %%mm4 \n\t" // 1
13e00528
A
520 PAVGB(%%mm4, %%mm2) // 112 /4
521 PAVGB(%%mm2, %%mm6) // 112242211 /16
3057fa66
A
522 "movq %%mm6, (%0, %1, 4) \n\t" // X
523 // mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9
13e00528
A
524 PAVGB(%%mm7, %%mm1) // 11 2 /4
525 PAVGB(%%mm4, %%mm5) // 11 /2
526 PAVGB(%%mm5, %%mm0) // 11 11 /4
3057fa66 527 "movq (%%eax, %1, 2), %%mm6 \n\t" // 1
13e00528
A
528 PAVGB(%%mm6, %%mm1) // 11 4 2 /8
529 PAVGB(%%mm0, %%mm1) // 11224222 /16
3057fa66
A
530 "movq %%mm1, (%%eax, %1, 2) \n\t" // X
531 // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
13e00528 532 PAVGB((%%ebx), %%mm2) // 112 4 /8
3057fa66 533 "movq (%%eax, %1, 4), %%mm0 \n\t" // 1
13e00528
A
534 PAVGB(%%mm0, %%mm6) // 1 1 /2
535 PAVGB(%%mm7, %%mm6) // 1 12 /4
536 PAVGB(%%mm2, %%mm6) // 1122424 /4
3057fa66
A
537 "movq %%mm6, (%%ebx) \n\t" // X
538 // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
13e00528
A
539 PAVGB(%%mm7, %%mm5) // 11 2 /4
540 PAVGB(%%mm7, %%mm5) // 11 6 /8
3057fa66 541
13e00528
A
542 PAVGB(%%mm3, %%mm0) // 112 /4
543 PAVGB(%%mm0, %%mm5) // 112246 /16
3057fa66 544 "movq %%mm5, (%%eax, %1, 4) \n\t" // X
8405b3fd 545 "subl %1, %0 \n\t"
3057fa66
A
546
547 :
548 : "r" (src), "r" (stride)
549 : "%eax", "%ebx"
550 );
3057fa66
A
551#else
552 const int l1= stride;
553 const int l2= stride + l1;
554 const int l3= stride + l2;
555 const int l4= stride + l3;
556 const int l5= stride + l4;
557 const int l6= stride + l5;
558 const int l7= stride + l6;
559 const int l8= stride + l7;
560 const int l9= stride + l8;
d5a1a995 561 int x;
acced553 562 src+= stride*3;
d5a1a995 563 for(x=0; x<BLOCK_SIZE; x++)
3057fa66
A
564 {
565 const int first= ABS(src[0] - src[l1]) < QP ? src[0] : src[l1];
566 const int last= ABS(src[l8] - src[l9]) < QP ? src[l9] : src[l8];
567
568 int sums[9];
569 sums[0] = first + src[l1];
570 sums[1] = src[l1] + src[l2];
571 sums[2] = src[l2] + src[l3];
572 sums[3] = src[l3] + src[l4];
573 sums[4] = src[l4] + src[l5];
574 sums[5] = src[l5] + src[l6];
575 sums[6] = src[l6] + src[l7];
576 sums[7] = src[l7] + src[l8];
577 sums[8] = src[l8] + last;
578
579 src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
e5c30e06
MN
580 src[l2]= ((src[l2]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
581 src[l3]= ((src[l3]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
582 src[l4]= ((src[l4]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
583 src[l5]= ((src[l5]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
584 src[l6]= ((src[l6]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
585 src[l7]= (((last + src[l7])<<2) + ((src[l8] + sums[5])<<1) + sums[3] + 8)>>4;
586 src[l8]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
3057fa66
A
587
588 src++;
589 }
590
591#endif
592}
593
13e00528
A
594/**
595 * Experimental implementation of the filter (Algorithm 1) described in a paper from Ramkishor & Karandikar
596 * values are correctly clipped (MMX2)
597 * values are wraparound (C)
598 * conclusion: its fast, but introduces ugly horizontal patterns if there is a continious gradient
599 0 8 16 24
600 x = 8
601 x/2 = 4
602 x/8 = 1
603 1 12 12 23
604 */
9f45d04d 605static inline void vertRK1Filter(uint8_t *src, int stride, int QP)
13e00528 606{
d5a1a995 607#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
acced553 608 src+= stride*3;
13e00528
A
609// FIXME rounding
610 asm volatile(
611 "pxor %%mm7, %%mm7 \n\t" // 0
612 "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
613 "leal (%0, %1), %%eax \n\t"
614 "leal (%%eax, %1, 4), %%ebx \n\t"
615// 0 1 2 3 4 5 6 7 8 9
616// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
617 "movq pQPb, %%mm0 \n\t" // QP,..., QP
618 "movq %%mm0, %%mm1 \n\t" // QP,..., QP
619 "paddusb b02, %%mm0 \n\t"
620 "psrlw $2, %%mm0 \n\t"
621 "pand b3F, %%mm0 \n\t" // QP/4,..., QP/4
622 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ...
623 "movq (%0, %1, 4), %%mm2 \n\t" // line 4
624 "movq (%%ebx), %%mm3 \n\t" // line 5
625 "movq %%mm2, %%mm4 \n\t" // line 4
626 "pcmpeqb %%mm5, %%mm5 \n\t" // -1
627 "pxor %%mm2, %%mm5 \n\t" // -line 4 - 1
d5a1a995 628 PAVGB(%%mm3, %%mm5)
13e00528
A
629 "paddb %%mm6, %%mm5 \n\t" // (l5-l4)/2
630 "psubusb %%mm3, %%mm4 \n\t"
631 "psubusb %%mm2, %%mm3 \n\t"
632 "por %%mm3, %%mm4 \n\t" // |l4 - l5|
633 "psubusb %%mm0, %%mm4 \n\t"
634 "pcmpeqb %%mm7, %%mm4 \n\t"
635 "pand %%mm4, %%mm5 \n\t" // d/2
636
637// "paddb %%mm6, %%mm2 \n\t" // line 4 + 0x80
638 "paddb %%mm5, %%mm2 \n\t"
639// "psubb %%mm6, %%mm2 \n\t"
640 "movq %%mm2, (%0,%1, 4) \n\t"
641
642 "movq (%%ebx), %%mm2 \n\t"
643// "paddb %%mm6, %%mm2 \n\t" // line 5 + 0x80
644 "psubb %%mm5, %%mm2 \n\t"
645// "psubb %%mm6, %%mm2 \n\t"
646 "movq %%mm2, (%%ebx) \n\t"
647
648 "paddb %%mm6, %%mm5 \n\t"
649 "psrlw $2, %%mm5 \n\t"
650 "pand b3F, %%mm5 \n\t"
651 "psubb b20, %%mm5 \n\t" // (l5-l4)/8
652
653 "movq (%%eax, %1, 2), %%mm2 \n\t"
654 "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80
655 "paddsb %%mm5, %%mm2 \n\t"
656 "psubb %%mm6, %%mm2 \n\t"
657 "movq %%mm2, (%%eax, %1, 2) \n\t"
658
659 "movq (%%ebx, %1), %%mm2 \n\t"
660 "paddb %%mm6, %%mm2 \n\t" // line 6 + 0x80
661 "psubsb %%mm5, %%mm2 \n\t"
662 "psubb %%mm6, %%mm2 \n\t"
663 "movq %%mm2, (%%ebx, %1) \n\t"
664
665 :
666 : "r" (src), "r" (stride)
667 : "%eax", "%ebx"
668 );
669#else
670 const int l1= stride;
671 const int l2= stride + l1;
672 const int l3= stride + l2;
673 const int l4= stride + l3;
674 const int l5= stride + l4;
675 const int l6= stride + l5;
e5c30e06
MN
676// const int l7= stride + l6;
677// const int l8= stride + l7;
678// const int l9= stride + l8;
d5a1a995 679 int x;
3407a972 680 const int QP15= QP + (QP>>2);
acced553 681 src+= stride*3;
d5a1a995 682 for(x=0; x<BLOCK_SIZE; x++)
13e00528 683 {
3407a972
MN
684 const int v = (src[x+l5] - src[x+l4]);
685 if(ABS(v) < QP15)
13e00528 686 {
3407a972
MN
687 src[x+l3] +=v>>3;
688 src[x+l4] +=v>>1;
689 src[x+l5] -=v>>1;
690 src[x+l6] -=v>>3;
13e00528 691
13e00528 692 }
13e00528
A
693 }
694
695#endif
696}
697
698/**
699 * Experimental Filter 1
9f45d04d
MN
700 * will not damage linear gradients
701 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
d5a1a995
MN
702 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
703 * MMX2 version does correct clipping C version doesnt
13e00528
A
704 */
705static inline void vertX1Filter(uint8_t *src, int stride, int QP)
706{
d5a1a995 707#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
acced553
MN
708 src+= stride*3;
709
13e00528 710 asm volatile(
d5a1a995
MN
711 "pxor %%mm7, %%mm7 \n\t" // 0
712// "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
713 "leal (%0, %1), %%eax \n\t"
714 "leal (%%eax, %1, 4), %%ebx \n\t"
715// 0 1 2 3 4 5 6 7 8 9
716// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
717 "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3
718 "movq (%0, %1, 4), %%mm1 \n\t" // line 4
719 "movq %%mm1, %%mm2 \n\t" // line 4
720 "psubusb %%mm0, %%mm1 \n\t"
721 "psubusb %%mm2, %%mm0 \n\t"
722 "por %%mm1, %%mm0 \n\t" // |l2 - l3|
723 "movq (%%ebx), %%mm3 \n\t" // line 5
724 "movq (%%ebx, %1), %%mm4 \n\t" // line 6
725 "movq %%mm3, %%mm5 \n\t" // line 5
726 "psubusb %%mm4, %%mm3 \n\t"
727 "psubusb %%mm5, %%mm4 \n\t"
728 "por %%mm4, %%mm3 \n\t" // |l5 - l6|
729 PAVGB(%%mm3, %%mm0) // (|l2 - l3| + |l5 - l6|)/2
730 "movq %%mm2, %%mm1 \n\t" // line 4
731 "psubusb %%mm5, %%mm2 \n\t"
732 "movq %%mm2, %%mm4 \n\t"
733 "pcmpeqb %%mm7, %%mm2 \n\t" // (l4 - l5) <= 0 ? -1 : 0
734 "psubusb %%mm1, %%mm5 \n\t"
735 "por %%mm5, %%mm4 \n\t" // |l4 - l5|
736 "psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
737 "movq %%mm4, %%mm3 \n\t" // d
738 "psubusb pQPb, %%mm4 \n\t"
739 "pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0
9f45d04d 740 "psubusb b01, %%mm3 \n\t"
d5a1a995
MN
741 "pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0
742
743 PAVGB(%%mm7, %%mm3) // d/2
9f45d04d
MN
744 "movq %%mm3, %%mm1 \n\t" // d/2
745 PAVGB(%%mm7, %%mm3) // d/4
746 PAVGB(%%mm1, %%mm3) // 3*d/8
d5a1a995
MN
747
748 "movq (%0, %1, 4), %%mm0 \n\t" // line 4
749 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
750 "psubusb %%mm3, %%mm0 \n\t"
751 "pxor %%mm2, %%mm0 \n\t"
752 "movq %%mm0, (%0, %1, 4) \n\t" // line 4
753
754 "movq (%%ebx), %%mm0 \n\t" // line 5
755 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
756 "paddusb %%mm3, %%mm0 \n\t"
757 "pxor %%mm2, %%mm0 \n\t"
758 "movq %%mm0, (%%ebx) \n\t" // line 5
759
9f45d04d 760 PAVGB(%%mm7, %%mm1) // d/4
d5a1a995
MN
761
762 "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3
763 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
9f45d04d 764 "psubusb %%mm1, %%mm0 \n\t"
d5a1a995
MN
765 "pxor %%mm2, %%mm0 \n\t"
766 "movq %%mm0, (%%eax, %1, 2) \n\t" // line 3
767
768 "movq (%%ebx, %1), %%mm0 \n\t" // line 6
769 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
9f45d04d 770 "paddusb %%mm1, %%mm0 \n\t"
d5a1a995
MN
771 "pxor %%mm2, %%mm0 \n\t"
772 "movq %%mm0, (%%ebx, %1) \n\t" // line 6
773
9f45d04d 774 PAVGB(%%mm7, %%mm1) // d/8
d5a1a995
MN
775
776 "movq (%%eax, %1), %%mm0 \n\t" // line 2
777 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
9f45d04d 778 "psubusb %%mm1, %%mm0 \n\t"
d5a1a995
MN
779 "pxor %%mm2, %%mm0 \n\t"
780 "movq %%mm0, (%%eax, %1) \n\t" // line 2
781
782 "movq (%%ebx, %1, 2), %%mm0 \n\t" // line 7
783 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
9f45d04d 784 "paddusb %%mm1, %%mm0 \n\t"
d5a1a995
MN
785 "pxor %%mm2, %%mm0 \n\t"
786 "movq %%mm0, (%%ebx, %1, 2) \n\t" // line 7
13e00528
A
787
788 :
789 : "r" (src), "r" (stride)
790 : "%eax", "%ebx"
791 );
792#else
d5a1a995
MN
793
794 const int l1= stride;
795 const int l2= stride + l1;
796 const int l3= stride + l2;
797 const int l4= stride + l3;
798 const int l5= stride + l4;
799 const int l6= stride + l5;
800 const int l7= stride + l6;
e5c30e06
MN
801// const int l8= stride + l7;
802// const int l9= stride + l8;
d5a1a995 803 int x;
acced553
MN
804
805 src+= stride*3;
d5a1a995
MN
806 for(x=0; x<BLOCK_SIZE; x++)
807 {
808 int a= src[l3] - src[l4];
809 int b= src[l4] - src[l5];
9f45d04d 810 int c= src[l5] - src[l6];
d5a1a995 811
3407a972
MN
812 int d= ABS(b) - ((ABS(a) + ABS(c))>>1);
813 d= MAX(d, 0);
d5a1a995
MN
814
815 if(d < QP)
816 {
817 int v = d * SIGN(-b);
818
3407a972
MN
819 src[l2] +=v>>3;
820 src[l3] +=v>>2;
821 src[l4] +=(3*v)>>3;
822 src[l5] -=(3*v)>>3;
823 src[l6] -=v>>2;
824 src[l7] -=v>>3;
d5a1a995
MN
825
826 }
827 src++;
828 }
829 /*
13e00528
A
830 const int l1= stride;
831 const int l2= stride + l1;
832 const int l3= stride + l2;
833 const int l4= stride + l3;
834 const int l5= stride + l4;
835 const int l6= stride + l5;
836 const int l7= stride + l6;
837 const int l8= stride + l7;
838 const int l9= stride + l8;
839 for(int x=0; x<BLOCK_SIZE; x++)
840 {
841 int v2= src[l2];
842 int v3= src[l3];
843 int v4= src[l4];
844 int v5= src[l5];
845 int v6= src[l6];
846 int v7= src[l7];
847
848 if(ABS(v4-v5)<QP && ABS(v4-v5) - (ABS(v3-v4) + ABS(v5-v6))>0 )
849 {
850 src[l3] = (6*v2 + 4*v3 + 3*v4 + 2*v5 + v6 )/16;
851 src[l4] = (3*v2 + 3*v3 + 4*v4 + 3*v5 + 2*v6 + v7 )/16;
852 src[l5] = (1*v2 + 2*v3 + 3*v4 + 4*v5 + 3*v6 + 3*v7)/16;
853 src[l6] = ( 1*v3 + 2*v4 + 3*v5 + 4*v6 + 6*v7)/16;
854 }
855 src++;
856 }
d5a1a995 857*/
13e00528
A
858#endif
859}
860
9f45d04d
MN
861/**
862 * Experimental Filter 1 (Horizontal)
863 * will not damage linear gradients
864 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
865 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
866 * MMX2 version does correct clipping C version doesnt
867 * not identical with the vertical one
868 */
869static inline void horizX1Filter(uint8_t *src, int stride, int QP)
870{
871 int y;
872 static uint64_t *lut= NULL;
873 if(lut==NULL)
874 {
875 int i;
876 lut= (uint64_t*)memalign(8, 256*8);
877 for(i=0; i<256; i++)
878 {
879 int v= i < 128 ? 2*i : 2*(i-256);
880/*
881//Simulate 112242211 9-Tap filter
882 uint64_t a= (v/16) & 0xFF;
883 uint64_t b= (v/8) & 0xFF;
884 uint64_t c= (v/4) & 0xFF;
885 uint64_t d= (3*v/8) & 0xFF;
886*/
887//Simulate piecewise linear interpolation
888 uint64_t a= (v/16) & 0xFF;
889 uint64_t b= (v*3/16) & 0xFF;
890 uint64_t c= (v*5/16) & 0xFF;
891 uint64_t d= (7*v/16) & 0xFF;
892 uint64_t A= (0x100 - a)&0xFF;
893 uint64_t B= (0x100 - b)&0xFF;
894 uint64_t C= (0x100 - c)&0xFF;
895 uint64_t D= (0x100 - c)&0xFF;
896
897 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
898 (D<<24) | (C<<16) | (B<<8) | (A);
899 //lut[i] = (v<<32) | (v<<24);
900 }
901 }
902
4e4dcbc5 903#if 0
9f45d04d
MN
904 asm volatile(
905 "pxor %%mm7, %%mm7 \n\t" // 0
906// "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE
907 "leal (%0, %1), %%eax \n\t"
908 "leal (%%eax, %1, 4), %%ebx \n\t"
909
910 "movq b80, %%mm6 \n\t"
79cccf70 911 "movd pQPb, %%mm5 \n\t" // QP
9f45d04d
MN
912 "movq %%mm5, %%mm4 \n\t"
913 "paddusb %%mm5, %%mm5 \n\t" // 2QP
914 "paddusb %%mm5, %%mm4 \n\t" // 3QP
915 "pxor %%mm5, %%mm5 \n\t" // 0
916 "psubb %%mm4, %%mm5 \n\t" // -3QP
917 "por bm11111110, %%mm5 \n\t" // ...,FF,FF,-3QP
918 "psllq $24, %%mm5 \n\t"
919
920// 0 1 2 3 4 5 6 7 8 9
921// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
922
923#define HX1old(a) \
924 "movd " #a ", %%mm0 \n\t"\
925 "movd 4" #a ", %%mm1 \n\t"\
926 "punpckldq %%mm1, %%mm0 \n\t"\
927 "movq %%mm0, %%mm1 \n\t"\
928 "movq %%mm0, %%mm2 \n\t"\
929 "psrlq $8, %%mm1 \n\t"\
930 "psubusb %%mm1, %%mm2 \n\t"\
931 "psubusb %%mm0, %%mm1 \n\t"\
932