Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/jxr/image/encode/strFwdTransform.c
4393 views
1
//*@@@+++@@@@******************************************************************
2
//
3
// Copyright © Microsoft Corp.
4
// All rights reserved.
5
//
6
// Redistribution and use in source and binary forms, with or without
7
// modification, are permitted provided that the following conditions are met:
8
//
9
// • Redistributions of source code must retain the above copyright notice,
10
// this list of conditions and the following disclaimer.
11
// • Redistributions in binary form must reproduce the above copyright notice,
12
// this list of conditions and the following disclaimer in the documentation
13
// and/or other materials provided with the distribution.
14
//
15
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
// POSSIBILITY OF SUCH DAMAGE.
26
//
27
//*@@@---@@@@******************************************************************
28
29
#include "strTransform.h"
30
#include "encode.h"
31
32
/** rotation by pi/8 **/
33
#define ROTATE1(a, b) (b) -= (((a) + 1) >> 1), (a) += (((b) + 1) >> 1) // this works well too
34
#define ROTATE2(a, b) (b) -= (((a)*3 + 4) >> 3), (a) += (((b)*3 + 4) >> 3) // this works well too
35
36
/** local functions **/
37
static Void fwdOddOdd(PixelI *, PixelI *, PixelI *, PixelI *);
38
static Void fwdOddOddPre(PixelI *, PixelI *, PixelI *, PixelI *);
39
static Void fwdOdd(PixelI *, PixelI *, PixelI *, PixelI *);
40
static Void strDCT2x2alt(PixelI * a, PixelI * b, PixelI * c, PixelI * d);
41
static Void strHSTenc1(PixelI *, PixelI *);
42
static Void strHSTenc(PixelI *, PixelI *, PixelI *, PixelI *);
43
static Void strHSTenc1_edge (PixelI *pa, PixelI *pd);
44
45
//static Void scaleDownUp0(PixelI *, PixelI *);
46
//static Void scaleDownUp1(PixelI *, PixelI *);
47
//static Void scaleDownUp2(PixelI *, PixelI *);
48
//#define FOURBUTTERFLY_ENC_ALT(p, i00, i01, i02, i03, i10, i11, i12, i13, \
49
// i20, i21, i22, i23, i30, i31, i32, i33) \
50
// strHSTenc(&p[i00], &p[i01], &p[i02], &p[i03]); \
51
// strHSTenc(&p[i10], &p[i11], &p[i12], &p[i13]); \
52
// strHSTenc(&p[i20], &p[i21], &p[i22], &p[i23]); \
53
// strHSTenc(&p[i30], &p[i31], &p[i32], &p[i33]); \
54
// strHSTenc1(&p[i00], &p[i03]); \
55
// strHSTenc1(&p[i10], &p[i13]); \
56
// strHSTenc1(&p[i20], &p[i23]); \
57
// strHSTenc1(&p[i30], &p[i33])
58
59
/** DCT stuff **/
60
/** data order before DCT **/
61
/** 0 1 2 3 **/
62
/** 4 5 6 7 **/
63
/** 8 9 10 11 **/
64
/** 12 13 14 15 **/
65
/** data order after DCT **/
66
/** 0 8 4 6 **/
67
/** 2 10 14 12 **/
68
/** 1 11 15 13 **/
69
/** 9 3 7 5 **/
70
/** reordering should be combined with zigzag scan **/
71
72
Void strDCT4x4Stage1(PixelI * p)
73
{
74
/** butterfly **/
75
//FOURBUTTERFLY(p, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
76
FOURBUTTERFLY_HARDCODED1(p);
77
78
/** top left corner, butterfly => butterfly **/
79
strDCT2x2up(&p[0], &p[1], &p[2], &p[3]);
80
81
/** bottom right corner, pi/8 rotation => pi/8 rotation **/
82
fwdOddOdd(&p[15], &p[14], &p[13], &p[12]);
83
84
/** top right corner, butterfly => pi/8 rotation **/
85
fwdOdd(&p[5], &p[4], &p[7], &p[6]);
86
87
/** bottom left corner, pi/8 rotation => butterfly **/
88
fwdOdd(&p[10], &p[8], &p[11], &p[9]);
89
}
90
91
Void strDCT4x4SecondStage(PixelI * p)
92
{
93
/** butterfly **/
94
FOURBUTTERFLY(p, 0, 192, 48, 240, 64, 128, 112, 176,16, 208, 32, 224, 80, 144, 96, 160);
95
96
/** top left corner, butterfly => butterfly **/
97
strDCT2x2up(&p[0], &p[64], &p[16], &p[80]);
98
99
/** bottom right corner, pi/8 rotation => pi/8 rotation **/
100
fwdOddOdd(&p[160], &p[224], &p[176], &p[240]);
101
102
/** top right corner, butterfly => pi/8 rotation **/
103
fwdOdd(&p[128], &p[192], &p[144], &p[208]);
104
105
/** bottom left corner, pi/8 rotation => butterfly **/
106
fwdOdd(&p[32], &p[48], &p[96], &p[112]);
107
}
108
109
Void strNormalizeEnc(PixelI* p, Bool bChroma)
110
{
111
int i;
112
if (!bChroma) {
113
//for (i = 0; i < 256; i += 16) {
114
// p[i] = (p[i] + 1) >> 2;
115
//}
116
}
117
else {
118
for (i = 0; i < 256; i += 16) {
119
p[i] >>= 1;
120
}
121
}
122
}
123
124
/** 2x2 DCT with pre-scaling - for use on encoder side **/
125
Void strDCT2x2dnEnc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
126
{
127
PixelI a, b, c, d, C, t;
128
a = (*pa + 0) >> 1;
129
b = (*pb + 0) >> 1;
130
C = (*pc + 0) >> 1;
131
d = (*pd + 0) >> 1;
132
//PixelI t1, t2;
133
134
a += d;
135
b -= C;
136
t = ((a - b) >> 1);
137
c = t - d;
138
d = t - C;
139
a -= d;
140
b += c;
141
142
*pa = a;
143
*pb = b;
144
*pc = c;
145
*pd = d;
146
}
147
148
/** pre filter stuff **/
149
/** 2-point pre for boundaries **/
150
Void strPre2(PixelI * pa, PixelI * pb)
151
{
152
PixelI a, b;
153
a = *pa;
154
b = *pb;
155
156
/** rotate **/
157
b -= ((a + 2) >> 2);
158
a -= ((b + 1) >> 1);
159
160
a -= (b >> 5);
161
a -= (b >> 9);
162
a -= (b >> 13);
163
164
b -= ((a + 2) >> 2);
165
166
*pa = a;
167
*pb = b;
168
}
169
170
Void strPre2x2(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
171
{
172
PixelI a, b, c, d;
173
a = *pa;
174
b = *pb;
175
c = *pc;
176
d = *pd;
177
178
/** butterflies **/
179
a += d;
180
b += c;
181
d -= (a + 1) >> 1;
182
c -= (b + 1) >> 1;
183
184
/** rotate **/
185
b -= ((a + 2) >> 2);
186
a -= ((b + 1) >> 1);
187
a -= (b >> 5);
188
a -= (b >> 9);
189
a -= (b >> 13);
190
b -= ((a + 2) >> 2);
191
192
/** butterflies **/
193
d += (a + 1) >> 1;
194
c += (b + 1) >> 1;
195
a -= d;
196
b -= c;
197
198
*pa = a;
199
*pb = b;
200
*pc = c;
201
*pd = d;
202
}
203
204
/** 4-point pre for boundaries **/
205
Void strPre4(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
206
{
207
PixelI a, b, c, d;
208
a = *pa;
209
b = *pb;
210
c = *pc;
211
d = *pd;
212
213
a += d, b += c;
214
d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
215
216
ROTATE1(c, d);
217
218
strHSTenc1_edge(&a, &d); strHSTenc1_edge(&b, &c);
219
220
d += ((a + 1) >> 1), c += ((b + 1) >> 1);
221
a -= d, b -= c;
222
223
*pa = a;
224
*pb = b;
225
*pc = c;
226
*pd = d;
227
}
228
229
/*****************************************************************************************
230
Input data offsets:
231
(15)(14)|(10+64)(11+64) p0 (15)(14)|(74)(75)
232
(13)(12)|( 8+64)( 9+64) (13)(12)|(72)(73)
233
--------+-------------- --------+--------
234
( 5)( 4)|( 0+64) (1+64) p1 ( 5)( 4)|(64)(65)
235
( 7)( 6)|( 2+64) (3+64) ( 7)( 6)|(66)(67)
236
*****************************************************************************************/
237
Void strPre4x4Stage1Split(PixelI *p0, PixelI *p1, Int iOffset)
238
{
239
PixelI *p2 = p0 + 72 - iOffset;
240
PixelI *p3 = p1 + 64 - iOffset;
241
p0 += 12;
242
p1 += 4;
243
244
/** butterfly & scaling **/
245
strHSTenc(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
246
strHSTenc(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
247
strHSTenc(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
248
strHSTenc(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
249
strHSTenc1(p0 + 0, p3 + 0);
250
strHSTenc1(p0 + 1, p3 + 1);
251
strHSTenc1(p0 + 2, p3 + 2);
252
strHSTenc1(p0 + 3, p3 + 3);
253
254
/** anti diagonal corners: rotation by pi/8 **/
255
ROTATE1(p1[2], p1[3]);
256
ROTATE1(p1[0], p1[1]);
257
ROTATE1(p2[1], p2[3]);
258
ROTATE1(p2[0], p2[2]);
259
260
/** bottom right corner: pi/8 rotation => pi/8 rotation **/
261
fwdOddOddPre(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
262
263
/** butterfly **/
264
strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
265
strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
266
strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
267
strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
268
}
269
270
Void strPre4x4Stage1(PixelI* p, Int iOffset)
271
{
272
strPre4x4Stage1Split(p, p + 16, iOffset);
273
}
274
275
/*****************************************************************************************
276
Input data offsets:
277
(15)(14)|(10+32)(11+32) p0 (15)(14)|(42)(43)
278
(13)(12)|( 8+32)( 9+32) (13)(12)|(40)(41)
279
--------+-------------- --------+--------
280
( 5)( 4)|( 0+32)( 1+32) p1 ( 5)( 4)|(32)(33)
281
( 7)( 6)|( 2+32)( 3+32) ( 7)( 6)|(34)(35)
282
*****************************************************************************************/
283
Void strPre4x4Stage2Split(PixelI* p0, PixelI* p1)
284
{
285
/** butterfly **/
286
strHSTenc(p0 - 96, p0 + 96, p1 - 112, p1 + 80);
287
strHSTenc(p0 - 32, p0 + 32, p1 - 48, p1 + 16);
288
strHSTenc(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
289
strHSTenc(p0 - 16, p0 + 48, p1 - 64, p1 + 0);
290
strHSTenc1(p0 - 96, p1 + 80);
291
strHSTenc1(p0 - 32, p1 + 16);
292
strHSTenc1(p0 - 80, p1 + 64);
293
strHSTenc1(p0 - 16, p1 + 0);
294
295
/** anti diagonal corners: rotation **/
296
ROTATE1(p1[-48], p1[-112]);
297
ROTATE1(p1[-64], p1[-128]);
298
ROTATE1(p0[112], p0[ 96]);
299
ROTATE1(p0[ 48], p0[ 32]);
300
301
/** bottom right corner: pi/8 rotation => pi/8 rotation **/
302
fwdOddOddPre(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
303
304
/** butterfly **/
305
strDCT2x2dn(p0 - 96, p1 - 112, p0 + 96, p1 + 80);
306
strDCT2x2dn(p0 - 32, p1 - 48, p0 + 32, p1 + 16);
307
strDCT2x2dn(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
308
strDCT2x2dn(p0 - 16, p1 - 64, p0 + 48, p1 + 0);
309
}
310
311
312
/**
313
Hadamard+Scale transform
314
for some strange reason, breaking up the function into two blocks, strHSTenc1 and strHSTenc
315
seems to work faster
316
**/
317
static Void strHSTenc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
318
{
319
/** different realization : does rescaling as well! **/
320
PixelI a, b, c, d;
321
a = *pa;
322
b = *pb;
323
d = *pc;
324
c = *pd;
325
326
a += c;
327
b -= d;
328
c = ((a - b) >> 1) - c;
329
d += (b >> 1);
330
b += c;
331
332
a -= (d * 3 + 4) >> 3;
333
334
*pa = a;
335
*pb = b;
336
*pc = c;
337
*pd = d;
338
}
339
340
static Void strHSTenc1(PixelI *pa, PixelI *pd)
341
{
342
/** different realization : does rescaling as well! **/
343
PixelI a, d;
344
a = *pa;
345
d = *pd;
346
347
d -= (a >> 7);
348
d += (a >> 10);
349
350
//a -= (d * 3 + 4) >> 3;
351
d -= (a * 3 + 0) >> 4;
352
a -= (d * 3 + 0) >> 3;
353
d = (a >> 1) - d;
354
a -= d;
355
356
*pa = a;
357
*pd = d;
358
}
359
360
static Void strHSTenc1_edge (PixelI *pa, PixelI *pd)
361
{
362
/** different realizion as compared to scaling operator for 2D case **/
363
PixelI a, d;
364
a = *pa;
365
d = -(*pd); // Negative sign needed here for 1D scaling case to ensure correct scaling.
366
367
a -= d;
368
d += (a >> 1);
369
a -= (d * 3 + 4) >> 3;
370
// End new operations
371
372
//Scaling modification of adding 7/1024 in two steps (without multiplication by 7).
373
d -= (a >> 7);
374
d += (a >> 10);
375
376
d -= (a * 3 + 0) >> 4;
377
a -= (d * 3 + 0) >> 3;
378
d = (a >> 1) - d;
379
a -= d;
380
381
*pa = a;
382
*pd = d;
383
}
384
385
/** Kron(Rotate(pi/8), Rotate(pi/8)) **/\
386
static Void fwdOddOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
387
{
388
PixelI a, b, c, d, t1, t2;
389
390
a = *pa;
391
b = -*pb;
392
c = -*pc;
393
d = *pd;
394
395
/** butterflies **/
396
d += a;
397
c -= b;
398
a -= (t1 = d >> 1);
399
b += (t2 = c >> 1);
400
401
/** rotate pi/4 **/
402
a += (b * 3 + 4) >> 3;
403
b -= (a * 3 + 3) >> 2;
404
a += (b * 3 + 3) >> 3;
405
406
/** butterflies **/
407
b -= t2;
408
a += t1;
409
c += b;
410
d -= a;
411
412
*pa = a;
413
*pb = b;
414
*pc = c;
415
*pd = d;
416
}
417
/** Kron(Rotate(pi/8), Rotate(pi/8)) **/
418
static Void fwdOddOddPre(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
419
{
420
PixelI a, b, c, d, t1, t2;
421
a = *pa;
422
b = *pb;
423
c = *pc;
424
d = *pd;
425
426
/** butterflies **/
427
d += a;
428
c -= b;
429
a -= (t1 = d >> 1);
430
b += (t2 = c >> 1);
431
432
/** rotate pi/4 **/
433
a += (b * 3 + 4) >> 3;
434
b -= (a * 3 + 2) >> 2;
435
a += (b * 3 + 6) >> 3;
436
437
/** butterflies **/
438
b -= t2;
439
a += t1;
440
c += b;
441
d -= a;
442
443
*pa = a;
444
*pb = b;
445
*pc = c;
446
*pd = d;
447
}
448
449
/** Kron(Rotate(pi/8), [1 1; 1 -1]/sqrt(2)) **/
450
/** [a b c d] => [D C A B] **/
451
Void fwdOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
452
{
453
PixelI a, b, c, d;
454
a = *pa;
455
b = *pb;
456
c = *pc;
457
d = *pd;
458
459
/** butterflies **/
460
b -= c;
461
a += d;
462
c += (b + 1) >> 1;
463
d = ((a + 1) >> 1) - d;
464
465
/** rotate pi/8 **/
466
ROTATE2(a, b);
467
ROTATE2(c, d);
468
469
/** butterflies **/
470
d += (b) >> 1;
471
c -= (a + 1) >> 1;
472
b -= d;
473
a += c;
474
475
*pa = a;
476
*pb = b;
477
*pc = c;
478
*pd = d;
479
}
480
481
/*************************************************************************
482
Top-level function to tranform possible part of a macroblock
483
*************************************************************************/
484
Void transformMacroblock(CWMImageStrCodec * pSC)
485
{
486
OVERLAP olOverlap = pSC->WMISCP.olOverlap;
487
COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
488
Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
489
Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
490
Bool leftORright = (left || right), topORbottom = (top || bottom);
491
Bool topORleft = (left || top);// rightORbottom = (right || bottom);
492
Bool leftAdjacentColumn = (pSC->cColumn == 1), rightAdjacentColumn = (pSC->cColumn == pSC->cmbWidth - 1);
493
// Bool topAdjacentRow = (pSC->cRow == 1), bottomAdjacentRow = (pSC->cRow == pSC->cmbHeight - 1);
494
PixelI * p = NULL;// * pt = NULL;
495
Int i, j;
496
Int iNumChromaFullPlanes = (Int)((YUV_420 == cfColorFormat || YUV_422 == cfColorFormat) ?
497
1 : pSC->m_param.cNumChannels);
498
499
#define mbX pSC->mbX
500
#define mbY pSC->mbY
501
#define tileX pSC->tileX
502
#define tileY pSC->tileY
503
#define bVertTileBoundary pSC->bVertTileBoundary
504
#define bHoriTileBoundary pSC->bHoriTileBoundary
505
#define bOneMBLeftVertTB pSC->bOneMBLeftVertTB
506
#define bOneMBRightVertTB pSC->bOneMBRightVertTB
507
#define iPredBefore pSC->iPredBefore
508
#define iPredAfter pSC->iPredAfter
509
510
if (pSC->WMISCP.bUseHardTileBoundaries) {
511
//Add tile location information
512
if (pSC->cColumn == 0) {
513
bVertTileBoundary = FALSE;
514
tileY = 0;
515
}
516
bOneMBLeftVertTB = bOneMBRightVertTB = FALSE;
517
if(tileY > 0 && tileY <= pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn - 1) == pSC->WMISCP.uiTileY[tileY])
518
bOneMBRightVertTB = TRUE;
519
if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && pSC->cColumn == pSC->WMISCP.uiTileY[tileY + 1]) {
520
bVertTileBoundary = TRUE;
521
tileY++;
522
}
523
else
524
bVertTileBoundary = FALSE;
525
if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn + 1) == pSC->WMISCP.uiTileY[tileY + 1])
526
bOneMBLeftVertTB = TRUE;
527
528
if (pSC->cRow == 0) {
529
bHoriTileBoundary = FALSE;
530
tileX = 0;
531
}
532
else if(mbY != pSC->cRow && tileX < pSC->WMISCP.cNumOfSliceMinus1V && pSC->cRow == pSC->WMISCP.uiTileX[tileX + 1]) {
533
bHoriTileBoundary = TRUE;
534
tileX++;
535
}
536
else if(mbY != pSC->cRow)
537
bHoriTileBoundary = FALSE;
538
}
539
else {
540
bVertTileBoundary = FALSE;
541
bHoriTileBoundary = FALSE;
542
bOneMBLeftVertTB = FALSE;
543
bOneMBRightVertTB = FALSE;
544
}
545
mbX = pSC->cColumn, mbY = pSC->cRow;
546
547
//================================================================
548
// 400_Y, 444_YUV
549
for(i = 0; i < iNumChromaFullPlanes; ++i)
550
{
551
PixelI* const p0 = pSC->p0MBbuffer[i];//(0 == i ? pSC->pY0 : (1 == i ? pSC->pU0 : pSC->pV0));
552
PixelI* const p1 = pSC->p1MBbuffer[i];//(0 == i ? pSC->pY1 : (1 == i ? pSC->pU1 : pSC->pV1));
553
554
//================================
555
// first level overlap
556
if(OL_NONE != olOverlap)
557
{
558
/* Corner operations */
559
if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
560
strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
561
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
562
strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
563
if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
564
strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
565
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
566
strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
567
if(!right && !bottom)
568
{
569
if (top || bHoriTileBoundary)
570
{
571
572
for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)
573
{
574
p = p1 + j;
575
strPre4(p + 5, p + 4, p + 64, p + 65);
576
strPre4(p + 7, p + 6, p + 66, p + 67);
577
p = NULL;
578
}
579
}
580
else
581
{
582
for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)
583
{
584
strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);
585
}
586
}
587
588
if (left || bVertTileBoundary)
589
{
590
if (!top && !bHoriTileBoundary)
591
{
592
strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);
593
strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);
594
}
595
596
for (j = -64; j < -16; j += 16)
597
{
598
p = p1 + j;
599
strPre4(p + 74, p + 72, p + 80, p + 82);
600
strPre4(p + 75, p + 73, p + 81, p + 83);
601
p = NULL;
602
}
603
}
604
else
605
{
606
for (j = -64; j < -16; j += 16)
607
{
608
strPre4x4Stage1(p1 + j, 0);
609
}
610
}
611
612
strPre4x4Stage1(p1 + 0, 0);
613
strPre4x4Stage1(p1 + 16, 0);
614
strPre4x4Stage1(p1 + 32, 0);
615
strPre4x4Stage1(p1 + 64, 0);
616
strPre4x4Stage1(p1 + 80, 0);
617
strPre4x4Stage1(p1 + 96, 0);
618
strPre4x4Stage1(p1 + 128, 0);
619
strPre4x4Stage1(p1 + 144, 0);
620
strPre4x4Stage1(p1 + 160, 0);
621
}
622
623
if (bottom || bHoriTileBoundary)
624
{
625
for (j = ((left || bVertTileBoundary) ? 48 : -16); j < (right ? -16 : 240); j += 64)
626
{
627
p = p0 + j;
628
strPre4(p + 15, p + 14, p + 74, p + 75);
629
strPre4(p + 13, p + 12, p + 72, p + 73);
630
p = NULL;
631
}
632
}
633
634
if ((right || bVertTileBoundary) && !bottom)
635
{
636
if (!top && !bHoriTileBoundary)
637
{
638
strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);
639
strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);
640
}
641
for (j = -64; j < -16; j += 16)
642
{
643
p = p1 + j;
644
strPre4(p + 15, p + 13, p + 21, p + 23);
645
strPre4(p + 14, p + 12, p + 20, p + 22);
646
p = NULL;
647
}
648
}
649
}
650
651
//================================
652
// first level transform
653
if (!top)
654
{
655
for (j = (left ? 48 : -16); j < (right ? 48 : 240); j += 64)
656
{
657
strDCT4x4Stage1(p0 + j);
658
}
659
}
660
661
if (!bottom)
662
{
663
for (j = (left ? 0 : -64); j < (right ? 0 : 192); j += 64)
664
{
665
strDCT4x4Stage1(p1 + j + 0);
666
strDCT4x4Stage1(p1 + j + 16);
667
strDCT4x4Stage1(p1 + j + 32);
668
}
669
}
670
671
//================================
672
// second level overlap
673
if (OL_TWO == olOverlap)
674
{
675
/* Corner operations */
676
if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
677
strPre4(p1 + 0, p1 + 64, p1 + 0 + 16, p1 + 64 + 16);
678
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
679
strPre4(p1 - 128, p1 - 64, p1 - 128 + 16, p1 - 64 + 16);
680
if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
681
strPre4(p0 + 32, p0 + 96, p0 + 32 + 16, p0 + 96 + 16);
682
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
683
strPre4(p0 - 96, p0 - 32, p0 - 96 + 16, p0 - 32 + 16);
684
if ((leftORright || bVertTileBoundary) && (!topORbottom && !bHoriTileBoundary))
685
{
686
if (left || bVertTileBoundary) {
687
j = 0;
688
strPre4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
689
strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
690
}
691
if (right || bVertTileBoundary) {
692
j = -128;
693
strPre4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
694
strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
695
}
696
}
697
698
if (!leftORright && !bVertTileBoundary)
699
{
700
if (topORbottom || bHoriTileBoundary)
701
{
702
if (top || bHoriTileBoundary) {
703
p = p1;
704
strPre4(p - 128, p - 64, p + 0, p + 64);
705
strPre4(p - 112, p - 48, p + 16, p + 80);
706
p = NULL;
707
}
708
if (bottom || bHoriTileBoundary) {
709
p = p0 + 32;
710
strPre4(p - 128, p - 64, p + 0, p + 64);
711
strPre4(p - 112, p - 48, p + 16, p + 80);
712
p = NULL;
713
}
714
}
715
else
716
{
717
strPre4x4Stage2Split(p0, p1);
718
}
719
}
720
}
721
722
//================================
723
// second level transform
724
if (!topORleft){
725
if (pSC->m_param.bScaledArith) {
726
strNormalizeEnc(p0 - 256, (i != 0));
727
}
728
strDCT4x4SecondStage(p0 - 256);
729
}
730
}
731
732
//================================================================
733
// 420_UV
734
for(i = 0; i < (YUV_420 == cfColorFormat? 2 : 0); ++i)
735
{
736
PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
737
PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
738
739
//================================
740
// first level overlap (420_UV)
741
if (OL_NONE != olOverlap)
742
{
743
/* Corner operations */
744
if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
745
strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
746
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
747
strPre4(p1 - 27, p1 - 28, p1 - 25, p1 - 26);
748
if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
749
strPre4(p0 + 16 + 10, p0 + 16 + 11, p0 + 16 + 8, p0 + 16 + 9);
750
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
751
strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
752
if(!right && !bottom)
753
{
754
if (top || bHoriTileBoundary)
755
{
756
757
for (j = ((left || bVertTileBoundary) ? 0 : -32); j < 32; j += 32)
758
{
759
p = p1 + j;
760
strPre4(p + 5, p + 4, p + 32, p + 33);
761
strPre4(p + 7, p + 6, p + 34, p + 35);
762
p = NULL;
763
}
764
}
765
else
766
{
767
for (j = ((left || bVertTileBoundary) ? 0: -32); j < 32; j += 32)
768
{
769
strPre4x4Stage1Split(p0 + 16 + j, p1 + j, 32);
770
}
771
}
772
773
if (left || bVertTileBoundary)
774
{
775
if (!top && !bHoriTileBoundary)
776
{
777
strPre4(p0 + 26, p0 + 24, p1 + 0, p1 + 2);
778
strPre4(p0 + 27, p0 + 25, p1 + 1, p1 + 3);
779
}
780
781
strPre4(p1 + 10, p1 + 8, p1 + 16, p1 + 18);
782
strPre4(p1 + 11, p1 + 9, p1 + 17, p1 + 19);
783
}
784
else if (!bVertTileBoundary)
785
{
786
strPre4x4Stage1(p1 - 32, 32);
787
}
788
789
strPre4x4Stage1(p1, 32);
790
}
791
792
if (bottom || bHoriTileBoundary)
793
{
794
for (j = ((left || bVertTileBoundary) ? 16: -16); j < (right ? -16: 32); j += 32)
795
{
796
p = p0 + j;
797
strPre4(p + 15, p + 14, p + 42, p + 43);
798
strPre4(p + 13, p + 12, p + 40, p + 41);
799
p = NULL;
800
}
801
}
802
803
if ((right || bVertTileBoundary) && !bottom)
804
{
805
if (!top && !bHoriTileBoundary)
806
{
807
strPre4(p0 - 1, p0 - 3, p1 - 27, p1 - 25);
808
strPre4(p0 - 2, p0 - 4, p1 - 28, p1 - 26);
809
}
810
811
strPre4(p1 - 17, p1 - 19, p1 - 11, p1 - 9);
812
strPre4(p1 - 18, p1 - 20, p1 - 12, p1 - 10);
813
}
814
}
815
816
//================================
817
// first level transform (420_UV)
818
if (!top)
819
{
820
for (j = (left ? 16 : -16); j < (right ? 16 : 48); j += 32)
821
{
822
strDCT4x4Stage1(p0 + j);
823
}
824
}
825
826
if (!bottom)
827
{
828
for (j = (left ? 0 : -32); j < (right ? 0 : 32); j += 32)
829
{
830
strDCT4x4Stage1(p1 + j);
831
}
832
}
833
834
//================================
835
// second level overlap (420_UV)
836
if (OL_TWO == olOverlap)
837
{
838
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
839
COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 0, *(p1 - 64 + 32));
840
841
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
842
iPredBefore[i][0] = *(p1 + 0);
843
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
844
COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 32, iPredBefore[i][0]);
845
846
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
847
COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 16, *(p0 - 64 + 48));
848
849
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
850
iPredBefore[i][1] = *(p0 + 16);
851
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
852
COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 48, iPredBefore[i][1]);
853
854
if ((leftORright || bVertTileBoundary) && !topORbottom && !bHoriTileBoundary)
855
{
856
if (left || bVertTileBoundary)
857
strPre2(p0 + 0 + 16, p1 + 0);
858
if (right || bVertTileBoundary)
859
strPre2(p0 + -32 + 16, p1 + -32);
860
}
861
862
if (!leftORright)
863
{
864
if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
865
{
866
if (top || bHoriTileBoundary)
867
strPre2(p1 - 32, p1);
868
if (bottom || bHoriTileBoundary)
869
strPre2(p0 + 16 - 32, p0 + 16);
870
}
871
else if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary)
872
strPre2x2(p0 - 16, p0 + 16, p1 - 32, p1);
873
}
874
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
875
COMPUTE_CORNER_PRED_ADD(p1 - 64 + 0, *(p1 - 64 + 32));
876
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
877
iPredAfter[i][0] = *(p1 + 0);
878
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
879
COMPUTE_CORNER_PRED_ADD(p1 - 64 + 32, iPredAfter[i][0]);
880
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
881
COMPUTE_CORNER_PRED_ADD(p0 - 64 + 16, *(p0 - 64 + 48));
882
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
883
iPredAfter[i][1] = *(p0 + 16);
884
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
885
COMPUTE_CORNER_PRED_ADD(p0 - 64 + 48, iPredAfter[i][1]);
886
}
887
888
//================================
889
// second level transform (420_UV)
890
if (!topORleft)
891
{
892
if (!pSC->m_param.bScaledArith) {
893
strDCT2x2dn(p0 - 64, p0 - 32, p0 - 48, p0 - 16);
894
}
895
else {
896
strDCT2x2dnEnc(p0 - 64, p0 - 32, p0 - 48, p0 - 16);
897
}
898
}
899
}
900
901
//================================================================
902
// 422_UV
903
for(i = 0; i < (YUV_422 == cfColorFormat? 2 : 0); ++i)
904
{
905
PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
906
PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
907
908
//================================
909
// first level overlap (422_UV)
910
if (OL_NONE != olOverlap)
911
{
912
/* Corner operations */
913
if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
914
strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
915
if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
916
strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
917
if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
918
strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
919
if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
920
strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
921
if(!right && !bottom)
922
{
923
if (top || bHoriTileBoundary)
924
{
925
926
for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 64; j += 64)
927
{
928
p = p1 + j;
929
strPre4(p + 5, p + 4, p + 64, p + 65);
930
strPre4(p + 7, p + 6, p + 66, p + 67);
931
p = NULL;
932
}
933
}
934
else
935
{
936
for (j = ((left || bVertTileBoundary) ? 0: -64); j < 64; j += 64)
937
{
938
strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);
939
}
940
}
941
942
if (left || bVertTileBoundary)
943
{
944
if (!top && !bHoriTileBoundary)
945
{
946
strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);
947
strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);
948
}
949
950
for (j = 0; j < 48; j += 16)
951
{
952
p = p1 + j;
953
strPre4(p + 10, p + 8, p + 16, p + 18);
954
strPre4(p + 11, p + 9, p + 17, p + 19);
955
p = NULL;
956
}
957
}
958
else if (!bVertTileBoundary)
959
{
960
for (j = -64; j < -16; j += 16)
961
{
962
strPre4x4Stage1(p1 + j, 0);
963
}
964
}
965
966
strPre4x4Stage1(p1 + 0, 0);
967
strPre4x4Stage1(p1 + 16, 0);
968
strPre4x4Stage1(p1 + 32, 0);
969
}
970
971
if (bottom || bHoriTileBoundary)
972
{
973
for (j = ((left || bVertTileBoundary) ? 48: -16); j < (right ? -16: 112); j += 64)
974
{
975
p = p0 + j;
976
strPre4(p + 15, p + 14, p + 74, p + 75);
977
strPre4(p + 13, p + 12, p + 72, p + 73);
978
p = NULL;
979
}
980
}
981
982
if ((right || bVertTileBoundary) && !bottom)
983
{
984
if (!top && !bHoriTileBoundary)
985
{
986
strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);
987
strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);
988
}
989
990
for (j = -64; j < -16; j += 16)
991
{
992
p = p1 + j;
993
strPre4(p + 15, p + 13, p + 21, p + 23);
994
strPre4(p + 14, p + 12, p + 20, p + 22);
995
p = NULL;
996
}
997
}
998
}
999
1000
//================================
1001
// first level transform (422_UV)
1002
if (!top)
1003
{
1004
for (j = (left ? 48 : -16); j < (right ? 48 : 112); j += 64)
1005
{
1006
strDCT4x4Stage1(p0 + j);
1007
}
1008
}
1009
1010
if (!bottom)
1011
{
1012
for (j = (left ? 0 : -64); j < (right ? 0 : 64); j += 64)
1013
{
1014
strDCT4x4Stage1(p1 + j + 0);
1015
strDCT4x4Stage1(p1 + j + 16);
1016
strDCT4x4Stage1(p1 + j + 32);
1017
}
1018
}
1019
1020
//================================
1021
// second level overlap (422_UV)
1022
if (OL_TWO == olOverlap)
1023
{
1024
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1025
COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 0, *(p1 - 128 + 64));
1026
1027
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1028
iPredBefore[i][0] = *(p1 + 0);
1029
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1030
COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 64, iPredBefore[i][0]);
1031
1032
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1033
COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 48, *(p0 - 128 + 112));
1034
1035
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1036
iPredBefore[i][1] = *(p0 + 48);
1037
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1038
COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 112, iPredBefore[i][1]);
1039
1040
if (!bottom)
1041
{
1042
if (leftORright || bVertTileBoundary)
1043
{
1044
if (!top && !bHoriTileBoundary)
1045
{
1046
if (left || bVertTileBoundary)
1047
strPre2(p0 + 48 + 0, p1 + 0);
1048
1049
if (right || bVertTileBoundary)
1050
strPre2(p0 + 48 + -64, p1 + -64);
1051
}
1052
1053
if (left || bVertTileBoundary)
1054
strPre2(p1 + 16, p1 + 16 + 16);
1055
1056
if (right || bVertTileBoundary)
1057
strPre2(p1 + -48, p1 + -48 + 16);
1058
}
1059
1060
if (!leftORright && !bVertTileBoundary)
1061
{
1062
if (top || bHoriTileBoundary)
1063
strPre2(p1 - 64, p1);
1064
else
1065
strPre2x2(p0 - 16, p0 + 48, p1 - 64, p1);
1066
1067
strPre2x2(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
1068
}
1069
}
1070
1071
if ((bottom || bHoriTileBoundary) && (!leftORright && !bVertTileBoundary))
1072
strPre2(p0 - 16, p0 + 48);
1073
1074
if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1075
COMPUTE_CORNER_PRED_ADD(p1 - 128 + 0, *(p1 - 128 + 64));
1076
1077
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1078
iPredAfter[i][0] = *(p1 + 0);
1079
if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1080
COMPUTE_CORNER_PRED_ADD(p1 - 128 + 64, iPredAfter[i][0]);
1081
1082
if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1083
COMPUTE_CORNER_PRED_ADD(p0 - 128 + 48, *(p0 - 128 + 112));
1084
1085
if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1086
iPredAfter[i][1] = *(p0 + 48);
1087
if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1088
COMPUTE_CORNER_PRED_ADD(p0 - 128 + 112, iPredAfter[i][1]);
1089
}
1090
1091
//================================
1092
// second level transform (422_UV)
1093
if (!topORleft)
1094
{
1095
if (!pSC->m_param.bScaledArith) {
1096
strDCT2x2dn(p0 - 128, p0 - 64, p0 - 112, p0 - 48);
1097
strDCT2x2dn(p0 - 96, p0 - 32, p0 - 80, p0 - 16);
1098
}
1099
else {
1100
strDCT2x2dnEnc(p0 - 128, p0 - 64, p0 - 112, p0 - 48);
1101
strDCT2x2dnEnc(p0 - 96, p0 - 32, p0 - 80, p0 - 16);
1102
}
1103
1104
// 1D lossless HT
1105
p0[- 96] -= p0[-128];
1106
p0[-128] += ((p0[-96] + 1) >> 1);
1107
}
1108
}
1109
assert(NULL == p);
1110
}
1111
1112
1113