Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/jxr/image/decode/strInvTransform.c
4393 views
1
//*@@@+++@@@@******************************************************************
2
//
3
// Copyright © Microsoft Corp.
4
// All rights reserved.
5
//
6
// Redistribution and use in source and binary forms, with or without
7
// modification, are permitted provided that the following conditions are met:
8
//
9
// • Redistributions of source code must retain the above copyright notice,
10
// this list of conditions and the following disclaimer.
11
// • Redistributions in binary form must reproduce the above copyright notice,
12
// this list of conditions and the following disclaimer in the documentation
13
// and/or other materials provided with the distribution.
14
//
15
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
// POSSIBILITY OF SUCH DAMAGE.
26
//
27
//*@@@---@@@@******************************************************************
28
29
#include "strTransform.h"
30
#include "strcodec.h"
31
#include "decode.h"
32
33
/** rotation by -pi/8 **/
34
#define IROTATE1(a, b) (a) -= (((b) + 1) >> 1), (b) += (((a) + 1) >> 1) // this works well too
35
#define IROTATE2(a, b) (a) -= (((b)*3 + 4) >> 3), (b) += (((a)*3 + 4) >> 3) // this works well too
36
37
/** local functions **/
38
static Void invOddOdd(PixelI *, PixelI *, PixelI *, PixelI *);
39
static Void invOddOddPost(PixelI *, PixelI *, PixelI *, PixelI *);
40
static Void invOdd(PixelI *, PixelI *, PixelI *, PixelI *);
41
static Void strHSTdec(PixelI *, PixelI *, PixelI *, PixelI *);
42
static Void strHSTdec1(PixelI *, PixelI *);
43
static Void strHSTdec1_alternate(PixelI *, PixelI *);
44
static Void strHSTdec1_edge(PixelI *pa, PixelI *pd);
45
46
/** IDCT stuff **/
47
/** reordering should be combined with zigzag scan **/
48
/** data order before IDCT **/
49
/** 0 8 4 6 **/
50
/** 2 10 14 12 **/
51
/** 1 11 15 13 **/
52
/** 9 3 7 5 **/
53
/** data order after IDCT **/
54
/** 0 1 2 3 **/
55
/** 4 5 6 7 **/
56
/** 8 9 10 11 **/
57
/** 12 13 14 15 **/
58
Void strIDCT4x4Stage1(PixelI* p)
59
{
60
/** top left corner, butterfly => butterfly **/
61
strDCT2x2up(p + 0, p + 1, p + 2, p + 3);
62
63
/** top right corner, -pi/8 rotation => butterfly **/
64
invOdd(p + 5, p + 4, p + 7, p + 6);
65
66
/** bottom left corner, butterfly => -pi/8 rotation **/
67
invOdd(p + 10, p + 8, p + 11, p + 9);
68
69
/** bottom right corner, -pi/8 rotation => -pi/8 rotation **/
70
invOddOdd(p + 15, p + 14, p + 13, p + 12);
71
72
/** butterfly **/
73
//FOURBUTTERFLY(p, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
74
FOURBUTTERFLY_HARDCODED1(p);
75
}
76
77
Void strIDCT4x4Stage2(PixelI* p)
78
{
79
/** bottom left corner, butterfly => -pi/8 rotation **/
80
invOdd(p + 32, p + 48, p + 96, p + 112);
81
82
/** top right corner, -pi/8 rotation => butterfly **/
83
invOdd(p + 128, p + 192, p + 144, p + 208);
84
85
/** bottom right corner, -pi/8 rotation => -pi/8 rotation **/
86
invOddOdd(p + 160, p + 224, p + 176, p + 240);
87
88
/** top left corner, butterfly => butterfly **/
89
strDCT2x2up(p + 0, p + 64, p + 16, p + 80);
90
91
/** butterfly **/
92
FOURBUTTERFLY(p, 0, 192, 48, 240, 64, 128, 112, 176, 16, 208, 32, 224, 80, 144, 96, 160);
93
}
94
95
Void strNormalizeDec(PixelI* p, Bool bChroma)
96
{
97
int i;
98
if (!bChroma) {
99
//for (i = 0; i < 256; i += 16) {
100
// p[i] <<= 2;
101
//}
102
}
103
else {
104
for (i = 0; i < 256; i += 16) {
105
p[i] += p[i];
106
}
107
}
108
}
109
110
/** 2x2 DCT with post-scaling - for use on decoder side **/
111
Void strDCT2x2dnDec(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
112
{
113
PixelI a, b, c, d, C, t;
114
a = *pa;
115
b = *pb;
116
C = *pc;
117
d = *pd;
118
119
a += d;
120
b -= C;
121
t = ((a - b) >> 1);
122
c = t - d;
123
d = t - C;
124
a -= d;
125
b += c;
126
127
*pa = a * 2;
128
*pb = b * 2;
129
*pc = c * 2;
130
*pd = d * 2;
131
}
132
133
134
/** post filter stuff **/
135
/** 2-point post for boundaries **/
136
Void strPost2(PixelI * a, PixelI * b)
137
{
138
*b += ((*a + 4) >> 3);
139
*a += ((*b + 2) >> 2);
140
*b += ((*a + 4) >> 3);
141
}
142
143
Void strPost2_alternate(PixelI * pa, PixelI * pb)
144
{
145
PixelI a, b;
146
a = *pa;
147
b = *pb;
148
149
/** rotate **/
150
b += ((a + 2) >> 2);
151
a += ((b + 1) >> 1);
152
a += (b >> 5);
153
a += (b >> 9);
154
a += (b >> 13);
155
156
b += ((a + 2) >> 2);
157
158
*pa = a;
159
*pb = b;
160
}
161
162
Void strPost2x2(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
163
{
164
PixelI a, b, c, d;
165
a = *pa;
166
b = *pb;
167
c = *pc;
168
d = *pd;
169
170
/** butterflies **/
171
a += d;
172
b += c;
173
d -= (a + 1) >> 1;
174
c -= (b + 1) >> 1;
175
176
/** rotate **/
177
b += ((a + 2) >> 2);
178
a += ((b + 1) >> 1);
179
b += ((a + 2) >> 2);
180
181
/** butterflies **/
182
d += (a + 1) >> 1;
183
c += (b + 1) >> 1;
184
a -= d;
185
b -= c;
186
187
*pa = a;
188
*pb = b;
189
*pc = c;
190
*pd = d;
191
}
192
193
Void strPost2x2_alternate(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
194
{
195
PixelI a, b, c, d;
196
a = *pa;
197
b = *pb;
198
c = *pc;
199
d = *pd;
200
201
/** butterflies **/
202
a += d;
203
b += c;
204
d -= (a + 1) >> 1;
205
c -= (b + 1) >> 1;
206
207
/** rotate **/
208
b += ((a + 2) >> 2);
209
a += ((b + 1) >> 1);
210
a += (b >> 5);
211
a += (b >> 9);
212
a += (b >> 13);
213
b += ((a + 2) >> 2);
214
215
/** butterflies **/
216
d += (a + 1) >> 1;
217
c += (b + 1) >> 1;
218
a -= d;
219
b -= c;
220
221
*pa = a;
222
*pb = b;
223
*pc = c;
224
*pd = d;
225
}
226
227
/** 4-point post for boundaries **/
228
Void strPost4(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
229
{
230
PixelI a, b, c, d;
231
a = *pa;
232
b = *pb;
233
c = *pc;
234
d = *pd;
235
236
a += d, b += c;
237
d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
238
239
IROTATE1(c, d);
240
241
d += ((a + 1) >> 1), c += ((b + 1) >> 1);
242
a -= d - ((d * 3 + 16) >> 5), b -= c - ((c * 3 + 16) >> 5);
243
d += ((a * 3 + 8) >> 4), c += ((b * 3 + 8) >> 4);
244
a += ((d * 3 + 16) >> 5), b += ((c * 3 + 16) >> 5);
245
246
*pa = a;
247
*pb = b;
248
*pc = c;
249
*pd = d;
250
}
251
252
Void strPost4_alternate(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
253
{
254
PixelI a, b, c, d;
255
a = *pa;
256
b = *pb;
257
c = *pc;
258
d = *pd;
259
260
a += d, b += c;
261
d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
262
263
strHSTdec1_edge(&a, &d); strHSTdec1_edge(&b, &c);
264
IROTATE1(c, d);
265
d += ((a + 1) >> 1), c += ((b + 1) >> 1);
266
267
a -= d, b -= c;
268
269
*pa = a;
270
*pb = b;
271
*pc = c;
272
*pd = d;
273
}
274
275
/*****************************************************************************************
276
Input data offsets:
277
(15)(14)|(10+64)(11+64) p0 (15)(14)|(74)(75)
278
(13)(12)|( 8+64)( 9+64) (13)(12)|(72)(73)
279
--------+-------------- --------+--------
280
( 5)( 4)|( 0+64) (1+64) p1 ( 5)( 4)|(64)(65)
281
( 7)( 6)|( 2+64) (3+64) ( 7)( 6)|(66)(67)
282
*****************************************************************************************/
283
Void DCCompensate (PixelI *a, PixelI *b, PixelI *c, PixelI *d, int iDC)
284
{
285
iDC = iDC>>1;
286
*a -= iDC;
287
*d -= iDC;
288
*b += iDC;
289
*c += iDC;
290
}
291
292
#ifndef max
293
#define max(a,b) (((a) > (b)) ? (a) : (b))
294
#endif
295
296
#ifndef min
297
#define min(a,b) (((a) < (b)) ? (a) : (b))
298
#endif
299
300
int ClipDCL(int iDCL, int iAltDCL)
301
{
302
int iClipDCL = 0;
303
if (iDCL > 0) {
304
if (iAltDCL > 0)
305
iClipDCL = min(iDCL, iAltDCL);
306
else
307
iClipDCL = 0;
308
}
309
else if (iDCL < 0) {
310
if (iAltDCL < 0)
311
iClipDCL = max(iDCL, iAltDCL);
312
else
313
iClipDCL = 0;
314
}
315
return iClipDCL;
316
}
317
318
Void strPost4x4Stage1Split(PixelI *p0, PixelI *p1, Int iOffset, Int iHPQP, Bool bHPAbsent)
319
{
320
int iDCLAlt1, iDCLAlt2, iDCLAlt3, iDCLAlt0;
321
int iDCL1, iDCL2, iDCL3, iDCL0;
322
int iTmp1, iTmp2, iTmp3, iTmp0;
323
324
PixelI *p2 = p0 + 72 - iOffset;
325
PixelI *p3 = p1 + 64 - iOffset;
326
p0 += 12;
327
p1 += 4;
328
329
/** buttefly **/
330
strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
331
strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
332
strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
333
strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
334
335
/** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
336
invOddOddPost(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
337
338
/** anti diagonal corners: rotation by -pi/8 **/
339
IROTATE1(p1[2], p1[3]);
340
IROTATE1(p1[0], p1[1]);
341
IROTATE1(p2[1], p2[3]);
342
IROTATE1(p2[0], p2[2]);
343
344
/** butterfly **/
345
strHSTdec1(p0 + 0, p3 + 0);
346
strHSTdec1(p0 + 1, p3 + 1);
347
strHSTdec1(p0 + 2, p3 + 2);
348
strHSTdec1(p0 + 3, p3 + 3);
349
strHSTdec(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
350
strHSTdec(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
351
strHSTdec(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
352
strHSTdec(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
353
354
iTmp0 = (*(p0 +0) + *(p1 +0) + *(p2 +0) + *(p3 +0))>>1;
355
iTmp1 = (*(p0 +1) + *(p1 +1) + *(p2 +1) + *(p3 +1))>>1;
356
iTmp2 = (*(p0 +2) + *(p1 +2) + *(p2 +2) + *(p3 +2))>>1;
357
iTmp3 = (*(p0 +3) + *(p1 +3) + *(p2 +3) + *(p3 +3))>>1;
358
iDCL0 = (iTmp0 * 595 + 65536)>>17; //Approximating 27/5947
359
iDCL1 = (iTmp1 * 595 + 65536)>>17;
360
iDCL2 = (iTmp2 * 595 + 65536)>>17;
361
iDCL3 = (iTmp3 * 595 + 65536)>>17;
362
if ((abs(iDCL0) < iHPQP && iHPQP > 20) || bHPAbsent) {
363
iDCLAlt0 = (*(p0 +0) - *(p1 +0) - *(p2 +0) + *(p3 +0))>>1;
364
iDCL0 = ClipDCL (iDCL0, iDCLAlt0);
365
DCCompensate (p0 + 0, p2 + 0, p1 + 0, p3 + 0, iDCL0);
366
}
367
if ((abs(iDCL1) < iHPQP && iHPQP > 20) || bHPAbsent) {
368
iDCLAlt1 = (*(p0 +1) - *(p1 +1) - *(p2 +1) + *(p3 +1))>>1;
369
iDCL1 = ClipDCL (iDCL1, iDCLAlt1);
370
DCCompensate (p0 + 1, p2 + 1, p1 + 1, p3 + 1, iDCL1);
371
}
372
if ((abs(iDCL2) < iHPQP && iHPQP > 20) || bHPAbsent) {
373
iDCLAlt2 = (*(p0 +2) - *(p1 +2) - *(p2 +2) + *(p3 +2))>>1;
374
iDCL2 = ClipDCL (iDCL2, iDCLAlt2);
375
DCCompensate (p0 + 2, p2 + 2, p1 + 2, p3 + 2, iDCL2);
376
}
377
if ((abs(iDCL3) < iHPQP && iHPQP > 20) || bHPAbsent) {
378
iDCLAlt3 = (*(p0 +3) - *(p1 +3) - *(p2 +3) + *(p3 +3))>>1;
379
iDCL3 = ClipDCL (iDCL3, iDCLAlt3);
380
DCCompensate (p0 + 3, p2 + 3, p1 + 3, p3 + 3, iDCL3);
381
}
382
}
383
384
Void strPost4x4Stage1(PixelI* p, Int iOffset, Int iHPQP, Bool bHPAbsent)
385
{
386
strPost4x4Stage1Split(p, p + 16, iOffset, iHPQP, bHPAbsent);
387
}
388
389
Void strPost4x4Stage1Split_alternate(PixelI *p0, PixelI *p1, Int iOffset)
390
{
391
PixelI *p2 = p0 + 72 - iOffset;
392
PixelI *p3 = p1 + 64 - iOffset;
393
p0 += 12;
394
p1 += 4;
395
396
/** buttefly **/
397
strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
398
strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
399
strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
400
strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
401
402
/** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
403
invOddOddPost(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
404
405
/** anti diagonal corners: rotation by -pi/8 **/
406
IROTATE1(p1[2], p1[3]);
407
IROTATE1(p1[0], p1[1]);
408
IROTATE1(p2[1], p2[3]);
409
IROTATE1(p2[0], p2[2]);
410
411
/** butterfly **/
412
strHSTdec1_alternate(p0 + 0, p3 + 0);
413
strHSTdec1_alternate(p0 + 1, p3 + 1);
414
strHSTdec1_alternate(p0 + 2, p3 + 2);
415
strHSTdec1_alternate(p0 + 3, p3 + 3);
416
strHSTdec(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
417
strHSTdec(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
418
strHSTdec(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
419
strHSTdec(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
420
}
421
422
Void strPost4x4Stage1_alternate(PixelI* p, Int iOffset)
423
{
424
strPost4x4Stage1Split_alternate(p, p + 16, iOffset);
425
}
426
427
/*****************************************************************************************
428
Input data offsets:
429
(15)(14)|(10+32)(11+32) p0 (15)(14)|(42)(43)
430
(13)(12)|( 8+32)( 9+32) (13)(12)|(40)(41)
431
--------+-------------- --------+--------
432
( 5)( 4)|( 0+32) (1+32) p1 ( 5)( 4)|(32)(33)
433
( 7)( 6)|( 2+32) (3+32) ( 7)( 6)|(34)(35)
434
*****************************************************************************************/
435
436
/*****************************************************************************************
437
Input data offsets:
438
( -96)(-32)|(32)( 96) p0
439
( -80)(-16)|(48)(112)
440
-----------+------------
441
(-128)(-64)|( 0)( 64) p1
442
(-112)(-48)|(16)( 80)
443
*****************************************************************************************/
444
Void strPost4x4Stage2Split(PixelI* p0, PixelI* p1)
445
{
446
/** buttefly **/
447
strDCT2x2dn(p0 - 96, p0 + 96, p1 - 112, p1 + 80);
448
strDCT2x2dn(p0 - 32, p0 + 32, p1 - 48, p1 + 16);
449
strDCT2x2dn(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
450
strDCT2x2dn(p0 - 16, p0 + 48, p1 - 64, p1 + 0);
451
452
/** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
453
invOddOddPost(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
454
455
/** anti diagonal corners: rotation by -pi/8 **/
456
IROTATE1(p0[ 48], p0[ 32]);
457
IROTATE1(p0[112], p0[ 96]);
458
IROTATE1(p1[-64], p1[-128]);
459
IROTATE1(p1[-48], p1[-112]);
460
461
/** butterfly **/
462
strHSTdec1(p0 - 96, p1 + 80);
463
strHSTdec1(p0 - 32, p1 + 16);
464
strHSTdec1(p0 - 80, p1 + 64);
465
strHSTdec1(p0 - 16, p1 + 0);
466
467
strHSTdec(p0 - 96, p1 - 112, p0 + 96, p1 + 80);
468
strHSTdec(p0 - 32, p1 - 48, p0 + 32, p1 + 16);
469
strHSTdec(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
470
strHSTdec(p0 - 16, p1 - 64, p0 + 48, p1 + 0);
471
}
472
473
Void strPost4x4Stage2Split_alternate(PixelI* p0, PixelI* p1)
474
{
475
/** buttefly **/
476
strDCT2x2dn(p0 - 96, p0 + 96, p1 - 112, p1 + 80);
477
strDCT2x2dn(p0 - 32, p0 + 32, p1 - 48, p1 + 16);
478
strDCT2x2dn(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
479
strDCT2x2dn(p0 - 16, p0 + 48, p1 - 64, p1 + 0);
480
481
/** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
482
invOddOddPost(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
483
484
/** anti diagonal corners: rotation by -pi/8 **/
485
IROTATE1(p0[ 48], p0[ 32]);
486
IROTATE1(p0[112], p0[ 96]);
487
IROTATE1(p1[-64], p1[-128]);
488
IROTATE1(p1[-48], p1[-112]);
489
490
/** butterfly **/
491
strHSTdec1_alternate(p0 - 96, p1 + 80);
492
strHSTdec1_alternate(p0 - 32, p1 + 16);
493
strHSTdec1_alternate(p0 - 80, p1 + 64);
494
strHSTdec1_alternate(p0 - 16, p1 + 0);
495
496
strHSTdec(p0 - 96, p1 - 112, p0 + 96, p1 + 80);
497
strHSTdec(p0 - 32, p1 - 48, p0 + 32, p1 + 16);
498
strHSTdec(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
499
strHSTdec(p0 - 16, p1 - 64, p0 + 48, p1 + 0);
500
}
501
502
/**
503
Hadamard+Scale transform
504
for some strange reason, breaking up the function into two blocks, strHSTdec1 and strHSTdec
505
seems to work faster
506
**/
507
static Void strHSTdec1(PixelI *pa, PixelI *pd)
508
{
509
/** different realization : does rescaling as well! **/
510
PixelI a, d;
511
a = *pa;
512
d = *pd;
513
514
a += d;
515
d = (a >> 1) - d;
516
a += (d * 3 + 0) >> 3;
517
d += (a * 3 + 0) >> 4;
518
//a += (d * 3 + 4) >> 3;
519
520
*pa = a;
521
*pd = d;
522
}
523
524
static Void strHSTdec1_alternate(PixelI *pa, PixelI *pd)
525
{
526
/** different realization : does rescaling as well! **/
527
PixelI a, d;
528
a = *pa;
529
d = *pd;
530
531
a += d;
532
d = (a >> 1) - d;
533
a += (d * 3 + 0) >> 3;
534
d += (a * 3 + 0) >> 4;
535
//a += (d * 3 + 4) >> 3;
536
537
d += (a >> 7);
538
d -= (a >> 10);
539
540
*pa = a;
541
*pd = d;
542
}
543
544
static Void strHSTdec1_edge (PixelI *pa, PixelI *pd)
545
{
546
/** different realization as compared to scaling operator for 2D case **/
547
PixelI a, d;
548
a = *pa;
549
d = *pd;
550
551
a += d;
552
d = (a >> 1) - d;
553
a += (d * 3 + 0) >> 3;
554
d += (a * 3 + 0) >> 4;
555
556
//Scaling modification of adding 7/1024 in 2 steps (without multiplication by 7).
557
d += (a >> 7);
558
d -= (a >> 10);
559
560
a += (d * 3 + 4) >> 3;
561
d -= (a >> 1);
562
a += d;
563
// End new operations
564
565
*pa = a;
566
*pd = -d; // Negative sign needed here for 1D scaling case to ensure correct scaling.
567
}
568
569
static Void strHSTdec(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
570
{
571
/** different realization : does rescaling as well! **/
572
PixelI a, b, c, d;
573
a = *pa;
574
b = *pb;
575
c = *pc;
576
d = *pd;
577
578
b -= c;
579
a += (d * 3 + 4) >> 3;
580
581
d -= (b >> 1);
582
c = ((a - b) >> 1) - c;
583
*pc = d;
584
*pd = c;
585
*pa = a - c, *pb = b + d;
586
}
587
588
/** Kron(Rotate(pi/8), Rotate(pi/8)) **/
589
static Void invOddOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
590
{
591
PixelI a, b, c, d, t1, t2;
592
a = *pa;
593
b = *pb;
594
c = *pc;
595
d = *pd;
596
597
/** butterflies **/
598
d += a;
599
c -= b;
600
a -= (t1 = d >> 1);
601
b += (t2 = c >> 1);
602
603
/** rotate pi/4 **/
604
a -= (b * 3 + 3) >> 3;
605
b += (a * 3 + 3) >> 2;
606
a -= (b * 3 + 4) >> 3;
607
608
/** butterflies **/
609
b -= t2;
610
a += t1;
611
c += b;
612
d -= a;
613
614
/** sign flips **/
615
*pa = a;
616
*pb = -b;
617
*pc = -c;
618
*pd = d;
619
}
620
621
/** Kron(Rotate(pi/8), Rotate(pi/8)) **/
622
static Void invOddOddPost(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
623
{
624
PixelI a, b, c, d, t1, t2;
625
a = *pa;
626
b = *pb;
627
c = *pc;
628
d = *pd;
629
630
/** butterflies **/
631
d += a;
632
c -= b;
633
a -= (t1 = d >> 1);
634
b += (t2 = c >> 1);
635
636
/** rotate pi/4 **/
637
a -= (b * 3 + 6) >> 3;
638
b += (a * 3 + 2) >> 2;
639
a -= (b * 3 + 4) >> 3;
640
641
/** butterflies **/
642
b -= t2;
643
a += t1;
644
c += b;
645
d -= a;
646
647
*pa = a;
648
*pb = b;
649
*pc = c;
650
*pd = d;
651
}
652
653
654
/** Kron(Rotate(-pi/8), [1 1; 1 -1]/sqrt(2)) **/
655
/** [D C A B] => [a b c d] **/
656
Void invOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
657
{
658
PixelI a, b, c, d;
659
a = *pa;
660
b = *pb;
661
c = *pc;
662
d = *pd;
663
664
/** butterflies **/
665
b += d;
666
a -= c;
667
d -= (b) >> 1;
668
c += (a + 1) >> 1;
669
670
/** rotate pi/8 **/
671
IROTATE2(a, b);
672
IROTATE2(c, d);
673
674
/** butterflies **/
675
c -= (b + 1) >> 1;
676
d = ((a + 1) >> 1) - d;
677
b += c;
678
a -= d;
679
680
*pa = a;
681
*pb = b;
682
*pc = c;
683
*pd = d;
684
}
685
686
/*************************************************************************
687
Top-level function to inverse tranform possible part of a macroblock
688
*************************************************************************/
689
Int invTransformMacroblock(CWMImageStrCodec * pSC)
690
{
691
const OVERLAP olOverlap = pSC->WMISCP.olOverlap;
692
const COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
693
// const BITDEPTH_BITS bdBitDepth = pSC->WMII.bdBitDepth;
694
const Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
695
const Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
696
const Bool topORbottom = (top || bottom), leftORright = (left || right);
697
const Bool topORleft = (top || left), bottomORright = (bottom || right);
698
const size_t mbWidth = pSC->cmbWidth, mbX = pSC->cColumn;
699
PixelI * p = NULL;// * pt = NULL;
700
size_t i;
701
const size_t iChannels = (cfColorFormat == YUV_420 || cfColorFormat == YUV_422) ? 1 : pSC->m_param.cNumChannels;
702
const size_t tScale = pSC->m_Dparam->cThumbnailScale;
703
Int j = 0;
704
705
Int qp[MAX_CHANNELS], dcqp[MAX_CHANNELS], iStrength = (1 << pSC->WMII.cPostProcStrength);
706
// ERR_CODE result = ICERR_OK;
707
708
Bool bHPAbsent = (pSC->WMISCP.sbSubband == SB_NO_HIGHPASS || pSC->WMISCP.sbSubband == SB_DC_ONLY);
709
710
if(pSC->WMII.cPostProcStrength > 0){
711
// threshold for post processing
712
for(i = 0; i < iChannels; i ++){
713
qp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerLP[i][pSC->MBInfo.iQIndexLP].iQP * iStrength * (olOverlap == OL_NONE ? 2 : 1);
714
dcqp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerDC[i][0].iQP * iStrength;
715
}
716
717
if(left) // a new MB row
718
slideOneMBRow(pSC->pPostProcInfo, pSC->m_param.cNumChannels, mbWidth, top, bottom); // previous current row becomes previous row
719
}
720
721
//================================================================
722
// 400_Y, 444_YUV
723
for (i = 0; i < iChannels && tScale < 16; ++i)
724
{
725
PixelI* const p0 = pSC->p0MBbuffer[i];
726
PixelI* const p1 = pSC->p1MBbuffer[i];
727
728
Int iHPQP = 255;
729
if (!bHPAbsent)
730
iHPQP = pSC->pTile[pSC->cTileColumn].pQuantizerHP[i][pSC->MBInfo.iQIndexHP].iQP;
731
732
//================================
733
// second level inverse transform
734
if (!bottomORright)
735
{
736
if(pSC->WMII.cPostProcStrength > 0)
737
updatePostProcInfo(pSC->pPostProcInfo, p1, mbX, i); // update postproc info before IDCT
738
739
strIDCT4x4Stage2(p1);
740
if (pSC->m_param.bScaledArith) {
741
strNormalizeDec(p1, (i != 0));
742
}
743
}
744
745
//================================
746
// second level inverse overlap
747
if (OL_TWO == olOverlap)
748
{
749
if (leftORright && (!topORbottom))
750
{
751
j = left ? 0 : -128;
752
strPost4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
753
strPost4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
754
}
755
756
if (!leftORright)
757
{
758
if (topORbottom)
759
{
760
p = top ? p1 : p0 + 32;
761
strPost4(p - 128, p - 64, p + 0, p + 64);
762
strPost4(p - 112, p - 48, p + 16, p + 80);
763
p = NULL;
764
}
765
else
766
{
767
strPost4x4Stage2Split(p0, p1);
768
}
769
}
770
}
771
772
if(pSC->WMII.cPostProcStrength > 0)
773
postProcMB(pSC->pPostProcInfo, p0, p1, mbX, i, dcqp[i]); // second stage deblocking
774
775
//================================
776
// first level inverse transform
777
if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
778
continue;
779
780
if (!top)
781
{
782
for (j = (left ? 32 : -96); j < (right ? 32 : 160); j += 64)
783
{
784
strIDCT4x4Stage1(p0 + j + 0);
785
strIDCT4x4Stage1(p0 + j + 16);
786
}
787
}
788
789
if (!bottom)
790
{
791
for (j = (left ? 0 : -128); j < (right ? 0 : 128); j += 64)
792
{
793
strIDCT4x4Stage1(p1 + j + 0);
794
strIDCT4x4Stage1(p1 + j + 16);
795
}
796
}
797
798
//================================
799
// first level inverse overlap
800
if (OL_NONE != olOverlap)
801
{
802
if (leftORright)
803
{
804
j = left ? 0 + 10 : -64 + 14;
805
if (!top)
806
{
807
p = p0 + 16 + j;
808
strPost4(p + 0, p - 2, p + 6, p + 8);
809
strPost4(p + 1, p - 1, p + 7, p + 9);
810
strPost4(p + 16, p + 14, p + 22, p + 24);
811
strPost4(p + 17, p + 15, p + 23, p + 25);
812
p = NULL;
813
}
814
if (!bottom)
815
{
816
p = p1 + j;
817
strPost4(p + 0, p - 2, p + 6, p + 8);
818
strPost4(p + 1, p - 1, p + 7, p + 9);
819
p = NULL;
820
}
821
if (!topORbottom)
822
{
823
strPost4(p0 + 48 + j + 0, p0 + 48 + j - 2, p1 - 10 + j, p1 - 8 + j);
824
strPost4(p0 + 48 + j + 1, p0 + 48 + j - 1, p1 - 9 + j, p1 - 7 + j);
825
}
826
}
827
828
if (top)
829
{
830
for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
831
{
832
p = p1 + j;
833
strPost4(p + 5, p + 4, p + 64, p + 65);
834
strPost4(p + 7, p + 6, p + 66, p + 67);
835
p = NULL;
836
837
strPost4x4Stage1(p1 + j, 0, iHPQP, bHPAbsent);
838
}
839
}
840
else if (bottom)
841
{
842
for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
843
{
844
strPost4x4Stage1(p0 + 16 + j, 0, iHPQP, bHPAbsent);
845
strPost4x4Stage1(p0 + 32 + j, 0, iHPQP, bHPAbsent);
846
847
p = p0 + 48 + j;
848
strPost4(p + 15, p + 14, p + 74, p + 75);
849
strPost4(p + 13, p + 12, p + 72, p + 73);
850
p = NULL;
851
}
852
}
853
else
854
{
855
for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
856
{
857
strPost4x4Stage1(p0 + 16 + j, 0, iHPQP, bHPAbsent);
858
strPost4x4Stage1(p0 + 32 + j, 0, iHPQP, bHPAbsent);
859
strPost4x4Stage1Split(p0 + 48 + j, p1 + j, 0, iHPQP, bHPAbsent);
860
strPost4x4Stage1(p1 + j, 0, iHPQP, bHPAbsent);
861
}
862
}
863
}
864
865
if(pSC->WMII.cPostProcStrength > 0 && (!topORleft))
866
postProcBlock(pSC->pPostProcInfo, p0, p1, mbX, i, qp[i]); // destairing and first stage deblocking
867
}
868
869
//================================================================
870
// 420_UV
871
for (i = 0; i < (YUV_420 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
872
{
873
PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
874
PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
875
876
Int iHPQP = 255;
877
if (!bHPAbsent)
878
iHPQP = pSC->pTile[pSC->cTileColumn].pQuantizerHP[i][pSC->MBInfo.iQIndexHP].iQP;
879
880
//========================================
881
// second level inverse transform (420_UV)
882
if (!bottomORright)
883
{
884
if (!pSC->m_param.bScaledArith) {
885
strDCT2x2dn(p1, p1 + 32, p1 + 16, p1 + 48);
886
}
887
else {
888
strDCT2x2dnDec(p1, p1 + 32, p1 + 16, p1 + 48);
889
}
890
}
891
892
//========================================
893
// second level inverse overlap (420_UV)
894
if (OL_TWO == olOverlap)
895
{
896
if (leftORright && !topORbottom)
897
{
898
j = (left ? 0 : -32);
899
strPost2(p0 + j + 16, p1 + j);
900
}
901
902
if (!leftORright)
903
{
904
if (topORbottom)
905
{
906
p = (top ? p1 : p0 + 16);
907
strPost2(p - 32, p);
908
p = NULL;
909
}
910
else{
911
strPost2x2(p0 - 16, p0 + 16, p1 - 32, p1);
912
}
913
}
914
}
915
916
//========================================
917
// first level inverse transform (420_UV)
918
if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
919
continue;
920
921
if (!top)
922
{
923
for (j = (left ? 16 : -16); j < (right ? 16 : 48); j += 32)
924
{
925
strIDCT4x4Stage1(p0 + j);
926
}
927
}
928
929
if (!bottom)
930
{
931
for (j = (left ? 0 : -32); j < (right ? 0 : 32); j += 32)
932
{
933
strIDCT4x4Stage1(p1 + j);
934
}
935
}
936
937
//========================================
938
// first level inverse overlap (420_UV)
939
if (OL_NONE != olOverlap)
940
{
941
if(!left && !top)
942
{
943
if (bottom)
944
{
945
for (j = -48; j < (right ? -16 : 16); j += 32)
946
{
947
p = p0 + j;
948
strPost4(p + 15, p + 14, p + 42, p + 43);
949
strPost4(p + 13, p + 12, p + 40, p + 41);
950
p = NULL;
951
}
952
}
953
else
954
{
955
for (j = -48; j < (right ? -16 : 16); j += 32)
956
{
957
strPost4x4Stage1Split(p0 + j, p1 - 16 + j, 32, iHPQP, bHPAbsent);
958
}
959
}
960
961
if (right)
962
{
963
if (!bottom)
964
{
965
strPost4(p0 - 2 , p0 - 4 , p1 - 28, p1 - 26);
966
strPost4(p0 - 1 , p0 - 3 , p1 - 27, p1 - 25);
967
}
968
969
strPost4(p0 - 18, p0 - 20, p0 - 12, p0 - 10);
970
strPost4(p0 - 17, p0 - 19, p0 - 11, p0 - 9);
971
}
972
else
973
{
974
strPost4x4Stage1(p0 - 32, 32, iHPQP, bHPAbsent);
975
}
976
977
strPost4x4Stage1(p0 - 64, 32, iHPQP, bHPAbsent);
978
}
979
else if (top)
980
{
981
for (j = (left ? 0: -64); j < (right ? -32: 0); j += 32)
982
{
983
p = p1 + j + 4;
984
strPost4(p + 1, p + 0, p + 28, p + 29);
985
strPost4(p + 3, p + 2, p + 30, p + 31);
986
p = NULL;
987
}
988
}
989
else if (left)
990
{
991
if (!bottom)
992
{
993
strPost4(p0 + 26, p0 + 24, p1 + 0, p1 + 2);
994
strPost4(p0 + 27, p0 + 25, p1 + 1, p1 + 3);
995
}
996
997
strPost4(p0 + 10, p0 + 8, p0 + 16, p0 + 18);
998
strPost4(p0 + 11, p0 + 9, p0 + 17, p0 + 19);
999
}
1000
}
1001
}
1002
1003
//================================================================
1004
// 422_UV
1005
for (i = 0; i < (YUV_422 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
1006
{
1007
PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
1008
PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
1009
1010
Int iHPQP = 255;
1011
if (!bHPAbsent)
1012
iHPQP = pSC->pTile[pSC->cTileColumn].pQuantizerHP[i][pSC->MBInfo.iQIndexHP].iQP;
1013
1014
//========================================
1015
// second level inverse transform (422_UV)
1016
if ((!bottomORright) && pSC->m_Dparam->cThumbnailScale < 16)
1017
{
1018
// 1D lossless HT
1019
p1[0] -= ((p1[32] + 1) >> 1);
1020
p1[32] += p1[0];
1021
1022
if (!pSC->m_param.bScaledArith) {
1023
strDCT2x2dn(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
1024
strDCT2x2dn(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
1025
}
1026
else {
1027
strDCT2x2dnDec(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
1028
strDCT2x2dnDec(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
1029
}
1030
}
1031
1032
//========================================
1033
// second level inverse overlap (422_UV)
1034
if (OL_TWO == olOverlap)
1035
{
1036
if (!bottom)
1037
{
1038
if (leftORright)
1039
{
1040
if (!top)
1041
{
1042
j = (left ? 0 : -64);
1043
strPost2(p0 + 48 + j, p1 + j);
1044
}
1045
1046
j = (left ? 16 : -48);
1047
strPost2(p1 + j, p1 + j + 16);
1048
}
1049
else
1050
{
1051
if (top)
1052
{
1053
strPost2(p1 - 64, p1);
1054
}
1055
else
1056
{
1057
strPost2x2(p0 - 16, p0 + 48, p1 - 64, p1);
1058
}
1059
1060
strPost2x2(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
1061
}
1062
}
1063
else if (!leftORright)
1064
{
1065
strPost2(p0 - 16, p0 + 48);
1066
}
1067
}
1068
1069
//========================================
1070
// first level inverse transform (422_UV)
1071
if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
1072
continue;
1073
1074
if (!top)
1075
{
1076
for (j = (left ? 48 : -16); j < (right ? 48 : 112); j += 64)
1077
{
1078
strIDCT4x4Stage1(p0 + j);
1079
}
1080
}
1081
1082
if (!bottom)
1083
{
1084
for (j = (left ? 0 : -64); j < (right ? 0 : 64); j += 64)
1085
{
1086
strIDCT4x4Stage1(p1 + j + 0);
1087
strIDCT4x4Stage1(p1 + j + 16);
1088
strIDCT4x4Stage1(p1 + j + 32);
1089
}
1090
}
1091
1092
//========================================
1093
// first level inverse overlap (422_UV)
1094
if (OL_NONE != olOverlap)
1095
{
1096
if (!top)
1097
{
1098
if (leftORright)
1099
{
1100
j = (left ? 32 + 10 : -32 + 14);
1101
1102
p = p0 + j;
1103
strPost4(p + 0, p - 2, p + 6, p + 8);
1104
strPost4(p + 1, p - 1, p + 7, p + 9);
1105
1106
p = NULL;
1107
}
1108
1109
for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
1110
{
1111
strPost4x4Stage1(p0 + j + 32, 0, iHPQP, bHPAbsent);
1112
}
1113
}
1114
1115
if (!bottom)
1116
{
1117
if (leftORright)
1118
{
1119
j = (left ? 0 + 10 : -64 + 14);
1120
1121
p = p1 + j;
1122
strPost4(p + 0, p - 2, p + 6, p + 8);
1123
strPost4(p + 1, p - 1, p + 7, p + 9);
1124
1125
p += 16;
1126
strPost4(p + 0, p - 2, p + 6, p + 8);
1127
strPost4(p + 1, p - 1, p + 7, p + 9);
1128
1129
p = NULL;
1130
}
1131
1132
for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
1133
{
1134
strPost4x4Stage1(p1 + j + 0, 0, iHPQP, bHPAbsent);
1135
strPost4x4Stage1(p1 + j + 16, 0, iHPQP, bHPAbsent);
1136
}
1137
}
1138
1139
if (topORbottom)
1140
{
1141
p = (top ? p1 + 5 : p0 + 48 + 13);
1142
for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
1143
{
1144
strPost4(p + j + 0, p + j - 1, p + j + 59, p + j + 60);
1145
strPost4(p + j + 2, p + j + 1, p + j + 61, p + j + 62);
1146
}
1147
p = NULL;
1148
}
1149
else
1150
{
1151
if (leftORright)
1152
{
1153
j = (left ? 0 + 0 : -64 + 4);
1154
strPost4(p0 + j + 48 + 10 + 0, p0 + j + 48 + 10 - 2, p1 + j + 0, p1 + j + 2);
1155
strPost4(p0 + j + 48 + 10 + 1, p0 + j + 48 + 10 - 1, p1 + j + 1, p1 + j + 3);
1156
}
1157
1158
for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
1159
{
1160
strPost4x4Stage1Split(p0 + j + 48, p1 + j + 0, 0, iHPQP, bHPAbsent);
1161
}
1162
}
1163
}
1164
}
1165
1166
return ICERR_OK;
1167
}
1168
1169
Int invTransformMacroblock_alteredOperators_hard(CWMImageStrCodec * pSC)
1170
{
1171
const OVERLAP olOverlap = pSC->WMISCP.olOverlap;
1172
const COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
1173
// const BITDEPTH_BITS bdBitDepth = pSC->WMII.bdBitDepth;
1174
const Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
1175
const Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
1176
const Bool topORbottom = (top || bottom), leftORright = (left || right);
1177
const Bool topORleft = (top || left), bottomORright = (bottom || right);
1178
Bool leftAdjacentColumn = (pSC->cColumn == 1), rightAdjacentColumn = (pSC->cColumn == pSC->cmbWidth - 1);
1179
// Bool topAdjacentRow = (pSC->cRow == 1), bottomAdjacentRow = (pSC->cRow == pSC->cmbHeight - 1);
1180
const size_t mbWidth = pSC->cmbWidth;
1181
PixelI * p = NULL;// * pt = NULL;
1182
size_t i;
1183
const size_t iChannels = (cfColorFormat == YUV_420 || cfColorFormat == YUV_422) ? 1 : pSC->m_param.cNumChannels;
1184
const size_t tScale = pSC->m_Dparam->cThumbnailScale;
1185
Int j = 0;
1186
1187
Int qp[MAX_CHANNELS], dcqp[MAX_CHANNELS], iStrength = (1 << pSC->WMII.cPostProcStrength);
1188
// ERR_CODE result = ICERR_OK;
1189
1190
#define mbX pSC->mbX
1191
#define mbY pSC->mbY
1192
#define tileX pSC->tileX
1193
#define tileY pSC->tileY
1194
#define bVertTileBoundary pSC->bVertTileBoundary
1195
#define bHoriTileBoundary pSC->bHoriTileBoundary
1196
#define bOneMBLeftVertTB pSC->bOneMBLeftVertTB
1197
#define bOneMBRightVertTB pSC->bOneMBRightVertTB
1198
#define iPredBefore pSC->iPredBefore
1199
#define iPredAfter pSC->iPredAfter
1200
1201
if (pSC->WMISCP.bUseHardTileBoundaries) {
1202
//Add tile location information
1203
if (pSC->cColumn == 0) {
1204
bVertTileBoundary = FALSE;
1205
tileY = 0;
1206
}
1207
bOneMBLeftVertTB = bOneMBRightVertTB = FALSE;
1208
if(tileY > 0 && tileY <= pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn - 1) == pSC->WMISCP.uiTileY[tileY])
1209
bOneMBRightVertTB = TRUE;
1210
if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && pSC->cColumn == pSC->WMISCP.uiTileY[tileY + 1]) {
1211
bVertTileBoundary = TRUE;
1212
tileY++;
1213
}
1214
else
1215
bVertTileBoundary = FALSE;
1216
if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn + 1) == pSC->WMISCP.uiTileY[tileY + 1])
1217
bOneMBLeftVertTB = TRUE;
1218
1219
if (pSC->cRow == 0) {
1220
bHoriTileBoundary = FALSE;
1221
tileX = 0;
1222
}
1223
else if(mbY != pSC->cRow && tileX < pSC->WMISCP.cNumOfSliceMinus1V && pSC->cRow == pSC->WMISCP.uiTileX[tileX + 1]) {
1224
bHoriTileBoundary = TRUE;
1225
tileX++;
1226
}
1227
else if(mbY != pSC->cRow)
1228
bHoriTileBoundary = FALSE;
1229
}
1230
else {
1231
bVertTileBoundary = FALSE;
1232
bHoriTileBoundary = FALSE;
1233
bOneMBLeftVertTB = FALSE;
1234
bOneMBRightVertTB = FALSE;
1235
}
1236
mbX = pSC->cColumn, mbY = pSC->cRow;
1237
1238
if(pSC->WMII.cPostProcStrength > 0){
1239
// threshold for post processing
1240
for(i = 0; i < iChannels; i ++){
1241
qp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerLP[i][pSC->MBInfo.iQIndexLP].iQP * iStrength * (olOverlap == OL_NONE ? 2 : 1);
1242
dcqp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerDC[i][0].iQP * iStrength;
1243
}
1244
1245
if(left) // a new MB row
1246
slideOneMBRow(pSC->pPostProcInfo, pSC->m_param.cNumChannels, mbWidth, top, bottom); // previous current row becomes previous row
1247
}
1248
1249
//================================================================
1250
// 400_Y, 444_YUV
1251
for (i = 0; i < iChannels && tScale < 16; ++i)
1252
{
1253
PixelI* const p0 = pSC->p0MBbuffer[i];
1254
PixelI* const p1 = pSC->p1MBbuffer[i];
1255
1256
//================================
1257
// second level inverse transform
1258
if (!bottomORright)
1259
{
1260
if(pSC->WMII.cPostProcStrength > 0)
1261
updatePostProcInfo(pSC->pPostProcInfo, p1, mbX, i); // update postproc info before IDCT
1262
1263
strIDCT4x4Stage2(p1);
1264
if (pSC->m_param.bScaledArith) {
1265
strNormalizeDec(p1, (i != 0));
1266
}
1267
}
1268
1269
//================================
1270
// second level inverse overlap
1271
if (OL_TWO == olOverlap)
1272
{
1273
/* Corner operations */
1274
if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
1275
strPost4_alternate(p1 + 0, p1 + 64, p1 + 0 + 16, p1 + 64 + 16);
1276
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
1277
strPost4_alternate(p1 - 128, p1 - 64, p1 - 128 + 16, p1 - 64 + 16);
1278
if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
1279
strPost4_alternate(p0 + 32, p0 + 96, p0 + 32 + 16, p0 + 96 + 16);
1280
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
1281
strPost4_alternate(p0 - 96, p0 - 32, p0 - 96 + 16, p0 - 32 + 16);
1282
if ((leftORright || bVertTileBoundary) && (!topORbottom && !bHoriTileBoundary))
1283
{
1284
if (left || bVertTileBoundary) {
1285
j = 0;
1286
strPost4_alternate(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
1287
strPost4_alternate(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
1288
}
1289
if (right || bVertTileBoundary) {
1290
j = -128;
1291
strPost4_alternate(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
1292
strPost4_alternate(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
1293
}
1294
}
1295
1296
if (!leftORright)
1297
{
1298
if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
1299
{
1300
if (top || bHoriTileBoundary) {
1301
p = p1;
1302
strPost4_alternate(p - 128, p - 64, p + 0, p + 64);
1303
strPost4_alternate(p - 112, p - 48, p + 16, p + 80);
1304
p = NULL;
1305
}
1306
if (bottom || bHoriTileBoundary) {
1307
p = p0 + 32;
1308
strPost4_alternate(p - 128, p - 64, p + 0, p + 64);
1309
strPost4_alternate(p - 112, p - 48, p + 16, p + 80);
1310
p = NULL;
1311
}
1312
}
1313
1314
if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary)
1315
strPost4x4Stage2Split_alternate(p0, p1);
1316
}
1317
}
1318
1319
if(pSC->WMII.cPostProcStrength > 0)
1320
postProcMB(pSC->pPostProcInfo, p0, p1, mbX, i, dcqp[i]); // second stage deblocking
1321
1322
//================================
1323
// first level inverse transform
1324
if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
1325
continue;
1326
1327
if (!top)
1328
{
1329
for (j = (left ? 32 : -96); j < (right ? 32 : 160); j += 64)
1330
{
1331
strIDCT4x4Stage1(p0 + j + 0);
1332
strIDCT4x4Stage1(p0 + j + 16);
1333
}
1334
}
1335
1336
if (!bottom)
1337
{
1338
for (j = (left ? 0 : -128); j < (right ? 0 : 128); j += 64)
1339
{
1340
// if(tScale == 2 && bdBitDepth != BD_1){
1341
// MIPgen(p1 + j + 0);
1342
// MIPgen(p1 + j + 16);
1343
// }
1344
strIDCT4x4Stage1(p1 + j + 0);
1345
strIDCT4x4Stage1(p1 + j + 16);
1346
}
1347
}
1348
1349
//================================
1350
// first level inverse overlap
1351
if (OL_NONE != olOverlap)
1352
{
1353
if (leftORright || bVertTileBoundary)
1354
{
1355
/* Corner operations */
1356
if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
1357
strPost4_alternate(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
1358
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
1359
strPost4_alternate(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
1360
if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
1361
strPost4_alternate(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
1362
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
1363
strPost4_alternate(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
1364
if (left || bVertTileBoundary) {
1365
j = 0 + 10;
1366
if (!top)
1367
{
1368
p = p0 + 16 + j;
1369
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1370
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1371
strPost4_alternate(p + 16, p + 14, p + 22, p + 24);
1372
strPost4_alternate(p + 17, p + 15, p + 23, p + 25);
1373
p = NULL;
1374
}
1375
if (!bottom)
1376
{
1377
p = p1 + j;
1378
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1379
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1380
p = NULL;
1381
}
1382
if (!topORbottom && !bHoriTileBoundary)
1383
{
1384
strPost4_alternate(p0 + 48 + j + 0, p0 + 48 + j - 2, p1 - 10 + j, p1 - 8 + j);
1385
strPost4_alternate(p0 + 48 + j + 1, p0 + 48 + j - 1, p1 - 9 + j, p1 - 7 + j);
1386
}
1387
}
1388
if (right || bVertTileBoundary) {
1389
j = -64 + 14;
1390
if (!top)
1391
{
1392
p = p0 + 16 + j;
1393
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1394
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1395
strPost4_alternate(p + 16, p + 14, p + 22, p + 24);
1396
strPost4_alternate(p + 17, p + 15, p + 23, p + 25);
1397
p = NULL;
1398
}
1399
if (!bottom)
1400
{
1401
p = p1 + j;
1402
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1403
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1404
p = NULL;
1405
}
1406
if (!topORbottom && !bHoriTileBoundary)
1407
{
1408
strPost4_alternate(p0 + 48 + j + 0, p0 + 48 + j - 2, p1 - 10 + j, p1 - 8 + j);
1409
strPost4_alternate(p0 + 48 + j + 1, p0 + 48 + j - 1, p1 - 9 + j, p1 - 7 + j);
1410
}
1411
}
1412
}
1413
1414
if (top || bHoriTileBoundary)
1415
{
1416
for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
1417
{
1418
if (!bVertTileBoundary || j != -64) {
1419
p = p1 + j;
1420
strPost4_alternate(p + 5, p + 4, p + 64, p + 65);
1421
strPost4_alternate(p + 7, p + 6, p + 66, p + 67);
1422
p = NULL;
1423
1424
strPost4x4Stage1_alternate(p1 + j, 0);
1425
}
1426
}
1427
}
1428
1429
if (bottom || bHoriTileBoundary)
1430
{
1431
for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
1432
{
1433
if (!bVertTileBoundary || j != -64) {
1434
strPost4x4Stage1_alternate(p0 + 16 + j, 0);
1435
strPost4x4Stage1_alternate(p0 + 32 + j, 0);
1436
1437
p = p0 + 48 + j;
1438
strPost4_alternate(p + 15, p + 14, p + 74, p + 75);
1439
strPost4_alternate(p + 13, p + 12, p + 72, p + 73);
1440
p = NULL;
1441
}
1442
}
1443
}
1444
1445
if (!top && !bottom && !bHoriTileBoundary)
1446
{
1447
for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
1448
{
1449
if (!bVertTileBoundary || j != -64) {
1450
strPost4x4Stage1_alternate(p0 + 16 + j, 0);
1451
strPost4x4Stage1_alternate(p0 + 32 + j, 0);
1452
strPost4x4Stage1Split_alternate(p0 + 48 + j, p1 + j, 0);
1453
strPost4x4Stage1_alternate(p1 + j, 0);
1454
}
1455
}
1456
}
1457
}
1458
1459
if(pSC->WMII.cPostProcStrength > 0 && (!topORleft))
1460
postProcBlock(pSC->pPostProcInfo, p0, p1, mbX, i, qp[i]); // destairing and first stage deblocking
1461
}
1462
1463
//================================================================
1464
// 420_UV
1465
for (i = 0; i < (YUV_420 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
1466
{
1467
PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
1468
PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
1469
1470
//========================================
1471
// second level inverse transform (420_UV)
1472
if (!bottomORright)
1473
{
1474
if (!pSC->m_param.bScaledArith) {
1475
strDCT2x2dn(p1, p1 + 32, p1 + 16, p1 + 48);
1476
}
1477
else {
1478
strDCT2x2dnDec(p1, p1 + 32, p1 + 16, p1 + 48);
1479
}
1480
}
1481
1482
//========================================
1483
// second level inverse overlap (420_UV)
1484
if (OL_TWO == olOverlap)
1485
{
1486
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1487
COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 0, *(p1 - 64 + 32));
1488
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1489
iPredBefore[i][0] = *(p1 + 0);
1490
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1491
COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 32, iPredBefore[i][0]);
1492
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1493
COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 16, *(p0 - 64 + 48));
1494
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1495
iPredBefore[i][1] = *(p0 + 16);
1496
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1497
COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 48, iPredBefore[i][1]);
1498
1499
if ((leftORright || bVertTileBoundary) && !topORbottom && !bHoriTileBoundary)
1500
{
1501
if (left || bVertTileBoundary)
1502
strPost2_alternate(p0 + 0 + 16, p1 + 0);
1503
if (right || bVertTileBoundary)
1504
strPost2_alternate(p0 + -32 + 16, p1 + -32);
1505
}
1506
1507
if (!leftORright)
1508
{
1509
if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
1510
{
1511
if (top || bHoriTileBoundary)
1512
strPost2_alternate(p1 - 32, p1);
1513
if (bottom || bHoriTileBoundary)
1514
strPost2_alternate(p0 + 16 - 32, p0 + 16);
1515
}
1516
else if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary) {
1517
strPost2x2_alternate(p0 - 16, p0 + 16, p1 - 32, p1);
1518
}
1519
}
1520
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1521
COMPUTE_CORNER_PRED_ADD(p1 - 64 + 0, *(p1 - 64 + 32));
1522
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1523
iPredAfter[i][0] = *(p1 + 0);
1524
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1525
COMPUTE_CORNER_PRED_ADD(p1 - 64 + 32, iPredAfter[i][0]);
1526
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1527
COMPUTE_CORNER_PRED_ADD(p0 - 64 + 16, *(p0 - 64 + 48));
1528
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1529
iPredAfter[i][1] = *(p0 + 16);
1530
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1531
COMPUTE_CORNER_PRED_ADD(p0 - 64 + 48, iPredAfter[i][1]);
1532
}
1533
1534
//========================================
1535
// first level inverse transform (420_UV)
1536
if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
1537
continue;
1538
1539
if (!top)
1540
{
1541
// In order to allow correction operation of corner chroma overlap operators (fixed)
1542
// processing of left most MB column must be delayed by one MB
1543
// Thus left MB not processed until leftAdjacentColumn = 1
1544
for (j = ((left) ? 48 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -48 : -16)); j < ((right || bVertTileBoundary) ? 16 : 48); j += 32)
1545
{
1546
strIDCT4x4Stage1(p0 + j);
1547
}
1548
}
1549
1550
if (!bottom)
1551
{
1552
// In order to allow correction operation of corner chroma overlap operators (fixed)
1553
// processing of left most MB column must be delayed by one MB
1554
// Thus left MB not processed until leftAdjacentColumn = 1
1555
for (j = ((left) ? 32 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -64 : -32)); j < ((right || bVertTileBoundary) ? 0 : 32); j += 32)
1556
{
1557
strIDCT4x4Stage1(p1 + j);
1558
}
1559
}
1560
1561
//========================================
1562
// first level inverse overlap (420_UV)
1563
if (OL_NONE != olOverlap)
1564
{
1565
/* Corner operations */
1566
/* Change because the top-left corner ICT will not have happened until leftAdjacentColumn ==1 */
1567
if ((top || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
1568
strPost4_alternate(p1 - 64 + 0, p1 - 64 + 1, p1 - 64 + 2, p1 - 64 + 3);
1569
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
1570
strPost4_alternate(p1 - 27, p1 - 28, p1 - 25, p1 - 26);
1571
/* Change because the bottom-left corner ICT will not have happened until leftAdjacentColumn ==1 */
1572
if ((bottom || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
1573
strPost4_alternate(p0 - 64 + 16 + 10, p0 - 64 + 16 + 11, p0 - 64 + 16 + 8, p0 - 64 + 16 + 9);
1574
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
1575
strPost4_alternate(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
1576
if(!left && !top)
1577
{
1578
/* Change because the vertical 1-D overlap operations of the left edge pixels cannot be performed until leftAdjacentColumn ==1 */
1579
if (leftAdjacentColumn || bOneMBRightVertTB)
1580
{
1581
if (!bottom && !bHoriTileBoundary)
1582
{
1583
strPost4_alternate(p0 - 64 + 26, p0 - 64 + 24, p1 - 64 + 0, p1 - 64 + 2);
1584
strPost4_alternate(p0 - 64 + 27, p0 - 64 + 25, p1 - 64 + 1, p1 - 64 + 3);
1585
}
1586
1587
strPost4_alternate(p0 - 64 + 10, p0 - 64 + 8, p0 - 64 + 16, p0 - 64 + 18);
1588
strPost4_alternate(p0 - 64 + 11, p0 - 64 + 9, p0 - 64 + 17, p0 - 64 + 19);
1589
}
1590
if (bottom || bHoriTileBoundary)
1591
{
1592
p = p0 + -48;
1593
strPost4_alternate(p + 15, p + 14, p + 42, p + 43);
1594
strPost4_alternate(p + 13, p + 12, p + 40, p + 41);
1595
p = NULL;
1596
1597
if (!right && !bVertTileBoundary)
1598
{
1599
p = p0 + -16;
1600
strPost4_alternate(p + 15, p + 14, p + 42, p + 43);
1601
strPost4_alternate(p + 13, p + 12, p + 40, p + 41);
1602
p = NULL;
1603
}
1604
}
1605
else
1606
{
1607
strPost4x4Stage1Split_alternate(p0 + -48, p1 - 16 + -48, 32);
1608
1609
if (!right && !bVertTileBoundary)
1610
strPost4x4Stage1Split_alternate(p0 + -16, p1 - 16 + -16, 32);
1611
}
1612
1613
if (right || bVertTileBoundary)
1614
{
1615
if (!bottom && !bHoriTileBoundary)
1616
{
1617
strPost4_alternate(p0 - 2 , p0 - 4 , p1 - 28, p1 - 26);
1618
strPost4_alternate(p0 - 1 , p0 - 3 , p1 - 27, p1 - 25);
1619
}
1620
1621
strPost4_alternate(p0 - 18, p0 - 20, p0 - 12, p0 - 10);
1622
strPost4_alternate(p0 - 17, p0 - 19, p0 - 11, p0 - 9);
1623
}
1624
else
1625
{
1626
strPost4x4Stage1_alternate(p0 - 32, 32);
1627
}
1628
1629
strPost4x4Stage1_alternate(p0 - 64, 32);
1630
}
1631
1632
if (top || bHoriTileBoundary)
1633
{
1634
if (!left)
1635
{
1636
p = p1 + -64 + 4;
1637
strPost4_alternate(p + 1, p + 0, p + 28, p + 29);
1638
strPost4_alternate(p + 3, p + 2, p + 30, p + 31);
1639
p = NULL;
1640
}
1641
1642
if (!left && !right && !bVertTileBoundary)
1643
{
1644
p = p1 + -32 + 4;
1645
strPost4_alternate(p + 1, p + 0, p + 28, p + 29);
1646
strPost4_alternate(p + 3, p + 2, p + 30, p + 31);
1647
p = NULL;
1648
}
1649
}
1650
}
1651
}
1652
1653
//================================================================
1654
// 422_UV
1655
for (i = 0; i < (YUV_422 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
1656
{
1657
PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
1658
PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
1659
1660
//========================================
1661
// second level inverse transform (422_UV)
1662
if ((!bottomORright) && pSC->m_Dparam->cThumbnailScale < 16)
1663
{
1664
// 1D lossless HT
1665
p1[0] -= ((p1[32] + 1) >> 1);
1666
p1[32] += p1[0];
1667
1668
if (!pSC->m_param.bScaledArith) {
1669
strDCT2x2dn(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
1670
strDCT2x2dn(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
1671
}
1672
else {
1673
strDCT2x2dnDec(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
1674
strDCT2x2dnDec(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
1675
}
1676
}
1677
1678
//========================================
1679
// second level inverse overlap (422_UV)
1680
if (OL_TWO == olOverlap)
1681
{
1682
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1683
COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 0, *(p1 - 128 + 64));
1684
1685
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1686
iPredBefore[i][0] = *(p1 + 0);
1687
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1688
COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 64, iPredBefore[i][0]);
1689
1690
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1691
COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 48, *(p0 - 128 + 112));
1692
1693
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1694
iPredBefore[i][1] = *(p0 + 48);
1695
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1696
COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 112, iPredBefore[i][1]);
1697
1698
if (!bottom)
1699
{
1700
if (leftORright || bVertTileBoundary)
1701
{
1702
if (!top && !bHoriTileBoundary)
1703
{
1704
if (left || bVertTileBoundary)
1705
strPost2_alternate(p0 + 48 + 0, p1 + 0);
1706
1707
if (right || bVertTileBoundary)
1708
strPost2_alternate(p0 + 48 + -64, p1 + -64);
1709
}
1710
1711
if (left || bVertTileBoundary)
1712
strPost2_alternate(p1 + 16, p1 + 16 + 16);
1713
1714
if (right || bVertTileBoundary)
1715
strPost2_alternate(p1 + -48, p1 + -48 + 16);
1716
}
1717
1718
if (!leftORright && !bVertTileBoundary)
1719
{
1720
if (top || bHoriTileBoundary)
1721
strPost2_alternate(p1 - 64, p1);
1722
else
1723
strPost2x2_alternate(p0 - 16, p0 + 48, p1 - 64, p1);
1724
1725
strPost2x2_alternate(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
1726
}
1727
}
1728
1729
if ((bottom || bHoriTileBoundary) && (!leftORright && !bVertTileBoundary))
1730
strPost2_alternate(p0 - 16, p0 + 48);
1731
1732
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1733
COMPUTE_CORNER_PRED_ADD(p1 - 128 + 0, *(p1 - 128 + 64));
1734
1735
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1736
iPredAfter[i][0] = *(p1 + 0);
1737
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1738
COMPUTE_CORNER_PRED_ADD(p1 - 128 + 64, iPredAfter[i][0]);
1739
1740
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1741
COMPUTE_CORNER_PRED_ADD(p0 - 128 + 48, *(p0 - 128 + 112));
1742
1743
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1744
iPredAfter[i][1] = *(p0 + 48);
1745
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1746
COMPUTE_CORNER_PRED_ADD(p0 - 128 + 112, iPredAfter[i][1]);
1747
}
1748
1749
//========================================
1750
// first level inverse transform (422_UV)
1751
if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
1752
continue;
1753
1754
if (!top)
1755
{
1756
// Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
1757
// Since 422 has no vertical downsampling, no top MB delay of processing is necessary
1758
for (j = (left ? 112 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -80 : -16)); j < ((right || bVertTileBoundary) ? 48 : 112); j += 64)
1759
{
1760
strIDCT4x4Stage1(p0 + j);
1761
}
1762
}
1763
1764
if (!bottom)
1765
{
1766
// Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
1767
// Since 422 has no vertical downsampling, no top MB delay of processing is necessary
1768
for (j = (left ? 64 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -128 : -64)); j < ((right || bVertTileBoundary) ? 0 : 64); j += 64)
1769
{
1770
strIDCT4x4Stage1(p1 + j + 0);
1771
strIDCT4x4Stage1(p1 + j + 16);
1772
strIDCT4x4Stage1(p1 + j + 32);
1773
}
1774
}
1775
1776
//========================================
1777
// first level inverse overlap (422_UV)
1778
if (OL_NONE != olOverlap)
1779
{
1780
/* Corner operations */
1781
if ((top || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
1782
strPost4_alternate(p1 - 128 + 0, p1 - 128 + 1, p1 - 128 + 2, p1 - 128 + 3);
1783
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
1784
strPost4_alternate(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
1785
if ((bottom || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
1786
strPost4_alternate(p0 - 128 + 48 + 10, p0 - 128 + 48 + 11, p0 - 128 + 48 + 8, p0 - 128 + 48 + 9);
1787
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
1788
strPost4_alternate(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
1789
if (!top)
1790
{
1791
// Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
1792
if (leftAdjacentColumn || bOneMBRightVertTB) {
1793
p = p0 + 32 + 10 - 128;
1794
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1795
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1796
p = NULL;
1797
}
1798
1799
if (right || bVertTileBoundary) {
1800
p = p0 + -32 + 14;
1801
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1802
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1803
p = NULL;
1804
}
1805
1806
for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
1807
strPost4x4Stage1_alternate(p0 + j + 32, 0);
1808
}
1809
1810
if (!bottom)
1811
{
1812
// Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
1813
if (leftAdjacentColumn || bOneMBRightVertTB)
1814
{
1815
p = p1 + 0 + 10 - 128;
1816
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1817
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1818
p += 16;
1819
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1820
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1821
p = NULL;
1822
}
1823
1824
if (right || bVertTileBoundary)
1825
{
1826
p = p1 + -64 + 14;
1827
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1828
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1829
p += 16;
1830
strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
1831
strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
1832
p = NULL;
1833
}
1834
1835
for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
1836
{
1837
strPost4x4Stage1_alternate(p1 + j + 0, 0);
1838
strPost4x4Stage1_alternate(p1 + j + 16, 0);
1839
}
1840
}
1841
1842
if (topORbottom || bHoriTileBoundary)
1843
{
1844
if (top || bHoriTileBoundary) {
1845
p = p1 + 5;
1846
for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
1847
{
1848
strPost4_alternate(p + j + 0, p + j - 1, p + j + 59, p + j + 60);
1849
strPost4_alternate(p + j + 2, p + j + 1, p + j + 61, p + j + 62);
1850
}
1851
p = NULL;
1852
}
1853
1854
if (bottom || bHoriTileBoundary) {
1855
p = p0 + 48 + 13;
1856
for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
1857
{
1858
strPost4_alternate(p + j + 0, p + j - 1, p + j + 59, p + j + 60);
1859
strPost4_alternate(p + j + 2, p + j + 1, p + j + 61, p + j + 62);
1860
}
1861
p = NULL;
1862
}
1863
}
1864
else
1865
{
1866
// Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
1867
if (leftAdjacentColumn || bOneMBRightVertTB)
1868
{
1869
j = 0 + 0 - 128;
1870
strPost4_alternate(p0 + j + 48 + 10 + 0, p0 + j + 48 + 10 - 2, p1 + j + 0, p1 + j + 2);
1871
strPost4_alternate(p0 + j + 48 + 10 + 1, p0 + j + 48 + 10 - 1, p1 + j + 1, p1 + j + 3);
1872
}
1873
1874
if (right || bVertTileBoundary)
1875
{
1876
j = -64 + 4;
1877
strPost4_alternate(p0 + j + 48 + 10 + 0, p0 + j + 48 + 10 - 2, p1 + j + 0, p1 + j + 2);
1878
strPost4_alternate(p0 + j + 48 + 10 + 1, p0 + j + 48 + 10 - 1, p1 + j + 1, p1 + j + 3);
1879
}
1880
1881
for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
1882
strPost4x4Stage1Split_alternate(p0 + j + 48, p1 + j + 0, 0);
1883
}
1884
}
1885
}
1886
1887
return ICERR_OK;
1888
}
1889
1890