Path: blob/master/libs/jxr/image/encode/strFwdTransform.c
4393 views
//*@@@+++@@@@******************************************************************1//2// Copyright © Microsoft Corp.3// All rights reserved.4//5// Redistribution and use in source and binary forms, with or without6// modification, are permitted provided that the following conditions are met:7//8// • Redistributions of source code must retain the above copyright notice,9// this list of conditions and the following disclaimer.10// • Redistributions in binary form must reproduce the above copyright notice,11// this list of conditions and the following disclaimer in the documentation12// and/or other materials provided with the distribution.13//14// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"15// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE16// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE17// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE18// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR19// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF20// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS21// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)23// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE24// POSSIBILITY OF SUCH DAMAGE.25//26//*@@@---@@@@******************************************************************2728#include "strTransform.h"29#include "encode.h"3031/** rotation by pi/8 **/32#define ROTATE1(a, b) (b) -= (((a) + 1) >> 1), (a) += (((b) + 1) >> 1) // this works well too33#define ROTATE2(a, b) (b) -= (((a)*3 + 4) >> 3), (a) += (((b)*3 + 4) >> 3) // this works well too3435/** local functions **/36static Void fwdOddOdd(PixelI *, PixelI *, PixelI *, PixelI *);37static Void fwdOddOddPre(PixelI *, PixelI *, PixelI *, PixelI *);38static Void fwdOdd(PixelI *, PixelI *, PixelI *, PixelI *);39static Void strDCT2x2alt(PixelI * a, PixelI * b, PixelI * c, PixelI * d);40static Void strHSTenc1(PixelI *, PixelI *);41static Void strHSTenc(PixelI *, PixelI *, PixelI *, PixelI *);42static Void strHSTenc1_edge (PixelI *pa, PixelI *pd);4344//static Void scaleDownUp0(PixelI *, PixelI *);45//static Void scaleDownUp1(PixelI *, PixelI *);46//static Void scaleDownUp2(PixelI *, PixelI *);47//#define FOURBUTTERFLY_ENC_ALT(p, i00, i01, i02, i03, i10, i11, i12, i13, \48// i20, i21, i22, i23, i30, i31, i32, i33) \49// strHSTenc(&p[i00], &p[i01], &p[i02], &p[i03]); \50// strHSTenc(&p[i10], &p[i11], &p[i12], &p[i13]); \51// strHSTenc(&p[i20], &p[i21], &p[i22], &p[i23]); \52// strHSTenc(&p[i30], &p[i31], &p[i32], &p[i33]); \53// strHSTenc1(&p[i00], &p[i03]); \54// strHSTenc1(&p[i10], &p[i13]); \55// strHSTenc1(&p[i20], &p[i23]); \56// strHSTenc1(&p[i30], &p[i33])5758/** DCT stuff **/59/** data order before DCT **/60/** 0 1 2 3 **/61/** 4 5 6 7 **/62/** 8 9 10 11 **/63/** 12 13 14 15 **/64/** data order after DCT **/65/** 0 8 4 6 **/66/** 2 10 14 12 **/67/** 1 11 15 13 **/68/** 9 3 7 5 **/69/** reordering should be combined with zigzag scan **/7071Void strDCT4x4Stage1(PixelI * p)72{73/** butterfly **/74//FOURBUTTERFLY(p, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);75FOURBUTTERFLY_HARDCODED1(p);7677/** top left corner, butterfly => butterfly **/78strDCT2x2up(&p[0], &p[1], &p[2], &p[3]);7980/** bottom right corner, pi/8 rotation => pi/8 rotation **/81fwdOddOdd(&p[15], &p[14], &p[13], &p[12]);8283/** top right corner, butterfly => pi/8 rotation **/84fwdOdd(&p[5], &p[4], &p[7], &p[6]);8586/** bottom left corner, pi/8 rotation => butterfly **/87fwdOdd(&p[10], &p[8], &p[11], &p[9]);88}8990Void strDCT4x4SecondStage(PixelI * p)91{92/** butterfly **/93FOURBUTTERFLY(p, 0, 192, 48, 240, 64, 128, 112, 176,16, 208, 32, 224, 80, 144, 96, 160);9495/** top left corner, butterfly => butterfly **/96strDCT2x2up(&p[0], &p[64], &p[16], &p[80]);9798/** bottom right corner, pi/8 rotation => pi/8 rotation **/99fwdOddOdd(&p[160], &p[224], &p[176], &p[240]);100101/** top right corner, butterfly => pi/8 rotation **/102fwdOdd(&p[128], &p[192], &p[144], &p[208]);103104/** bottom left corner, pi/8 rotation => butterfly **/105fwdOdd(&p[32], &p[48], &p[96], &p[112]);106}107108Void strNormalizeEnc(PixelI* p, Bool bChroma)109{110int i;111if (!bChroma) {112//for (i = 0; i < 256; i += 16) {113// p[i] = (p[i] + 1) >> 2;114//}115}116else {117for (i = 0; i < 256; i += 16) {118p[i] >>= 1;119}120}121}122123/** 2x2 DCT with pre-scaling - for use on encoder side **/124Void strDCT2x2dnEnc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)125{126PixelI a, b, c, d, C, t;127a = (*pa + 0) >> 1;128b = (*pb + 0) >> 1;129C = (*pc + 0) >> 1;130d = (*pd + 0) >> 1;131//PixelI t1, t2;132133a += d;134b -= C;135t = ((a - b) >> 1);136c = t - d;137d = t - C;138a -= d;139b += c;140141*pa = a;142*pb = b;143*pc = c;144*pd = d;145}146147/** pre filter stuff **/148/** 2-point pre for boundaries **/149Void strPre2(PixelI * pa, PixelI * pb)150{151PixelI a, b;152a = *pa;153b = *pb;154155/** rotate **/156b -= ((a + 2) >> 2);157a -= ((b + 1) >> 1);158159a -= (b >> 5);160a -= (b >> 9);161a -= (b >> 13);162163b -= ((a + 2) >> 2);164165*pa = a;166*pb = b;167}168169Void strPre2x2(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)170{171PixelI a, b, c, d;172a = *pa;173b = *pb;174c = *pc;175d = *pd;176177/** butterflies **/178a += d;179b += c;180d -= (a + 1) >> 1;181c -= (b + 1) >> 1;182183/** rotate **/184b -= ((a + 2) >> 2);185a -= ((b + 1) >> 1);186a -= (b >> 5);187a -= (b >> 9);188a -= (b >> 13);189b -= ((a + 2) >> 2);190191/** butterflies **/192d += (a + 1) >> 1;193c += (b + 1) >> 1;194a -= d;195b -= c;196197*pa = a;198*pb = b;199*pc = c;200*pd = d;201}202203/** 4-point pre for boundaries **/204Void strPre4(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)205{206PixelI a, b, c, d;207a = *pa;208b = *pb;209c = *pc;210d = *pd;211212a += d, b += c;213d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);214215ROTATE1(c, d);216217strHSTenc1_edge(&a, &d); strHSTenc1_edge(&b, &c);218219d += ((a + 1) >> 1), c += ((b + 1) >> 1);220a -= d, b -= c;221222*pa = a;223*pb = b;224*pc = c;225*pd = d;226}227228/*****************************************************************************************229Input data offsets:230(15)(14)|(10+64)(11+64) p0 (15)(14)|(74)(75)231(13)(12)|( 8+64)( 9+64) (13)(12)|(72)(73)232--------+-------------- --------+--------233( 5)( 4)|( 0+64) (1+64) p1 ( 5)( 4)|(64)(65)234( 7)( 6)|( 2+64) (3+64) ( 7)( 6)|(66)(67)235*****************************************************************************************/236Void strPre4x4Stage1Split(PixelI *p0, PixelI *p1, Int iOffset)237{238PixelI *p2 = p0 + 72 - iOffset;239PixelI *p3 = p1 + 64 - iOffset;240p0 += 12;241p1 += 4;242243/** butterfly & scaling **/244strHSTenc(p0 + 0, p2 + 0, p1 + 0, p3 + 0);245strHSTenc(p0 + 1, p2 + 1, p1 + 1, p3 + 1);246strHSTenc(p0 + 2, p2 + 2, p1 + 2, p3 + 2);247strHSTenc(p0 + 3, p2 + 3, p1 + 3, p3 + 3);248strHSTenc1(p0 + 0, p3 + 0);249strHSTenc1(p0 + 1, p3 + 1);250strHSTenc1(p0 + 2, p3 + 2);251strHSTenc1(p0 + 3, p3 + 3);252253/** anti diagonal corners: rotation by pi/8 **/254ROTATE1(p1[2], p1[3]);255ROTATE1(p1[0], p1[1]);256ROTATE1(p2[1], p2[3]);257ROTATE1(p2[0], p2[2]);258259/** bottom right corner: pi/8 rotation => pi/8 rotation **/260fwdOddOddPre(p3 + 0, p3 + 1, p3 + 2, p3 + 3);261262/** butterfly **/263strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);264strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);265strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);266strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);267}268269Void strPre4x4Stage1(PixelI* p, Int iOffset)270{271strPre4x4Stage1Split(p, p + 16, iOffset);272}273274/*****************************************************************************************275Input data offsets:276(15)(14)|(10+32)(11+32) p0 (15)(14)|(42)(43)277(13)(12)|( 8+32)( 9+32) (13)(12)|(40)(41)278--------+-------------- --------+--------279( 5)( 4)|( 0+32)( 1+32) p1 ( 5)( 4)|(32)(33)280( 7)( 6)|( 2+32)( 3+32) ( 7)( 6)|(34)(35)281*****************************************************************************************/282Void strPre4x4Stage2Split(PixelI* p0, PixelI* p1)283{284/** butterfly **/285strHSTenc(p0 - 96, p0 + 96, p1 - 112, p1 + 80);286strHSTenc(p0 - 32, p0 + 32, p1 - 48, p1 + 16);287strHSTenc(p0 - 80, p0 + 112, p1 - 128, p1 + 64);288strHSTenc(p0 - 16, p0 + 48, p1 - 64, p1 + 0);289strHSTenc1(p0 - 96, p1 + 80);290strHSTenc1(p0 - 32, p1 + 16);291strHSTenc1(p0 - 80, p1 + 64);292strHSTenc1(p0 - 16, p1 + 0);293294/** anti diagonal corners: rotation **/295ROTATE1(p1[-48], p1[-112]);296ROTATE1(p1[-64], p1[-128]);297ROTATE1(p0[112], p0[ 96]);298ROTATE1(p0[ 48], p0[ 32]);299300/** bottom right corner: pi/8 rotation => pi/8 rotation **/301fwdOddOddPre(p1 + 0, p1 + 64, p1 + 16, p1 + 80);302303/** butterfly **/304strDCT2x2dn(p0 - 96, p1 - 112, p0 + 96, p1 + 80);305strDCT2x2dn(p0 - 32, p1 - 48, p0 + 32, p1 + 16);306strDCT2x2dn(p0 - 80, p1 - 128, p0 + 112, p1 + 64);307strDCT2x2dn(p0 - 16, p1 - 64, p0 + 48, p1 + 0);308}309310311/**312Hadamard+Scale transform313for some strange reason, breaking up the function into two blocks, strHSTenc1 and strHSTenc314seems to work faster315**/316static Void strHSTenc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)317{318/** different realization : does rescaling as well! **/319PixelI a, b, c, d;320a = *pa;321b = *pb;322d = *pc;323c = *pd;324325a += c;326b -= d;327c = ((a - b) >> 1) - c;328d += (b >> 1);329b += c;330331a -= (d * 3 + 4) >> 3;332333*pa = a;334*pb = b;335*pc = c;336*pd = d;337}338339static Void strHSTenc1(PixelI *pa, PixelI *pd)340{341/** different realization : does rescaling as well! **/342PixelI a, d;343a = *pa;344d = *pd;345346d -= (a >> 7);347d += (a >> 10);348349//a -= (d * 3 + 4) >> 3;350d -= (a * 3 + 0) >> 4;351a -= (d * 3 + 0) >> 3;352d = (a >> 1) - d;353a -= d;354355*pa = a;356*pd = d;357}358359static Void strHSTenc1_edge (PixelI *pa, PixelI *pd)360{361/** different realizion as compared to scaling operator for 2D case **/362PixelI a, d;363a = *pa;364d = -(*pd); // Negative sign needed here for 1D scaling case to ensure correct scaling.365366a -= d;367d += (a >> 1);368a -= (d * 3 + 4) >> 3;369// End new operations370371//Scaling modification of adding 7/1024 in two steps (without multiplication by 7).372d -= (a >> 7);373d += (a >> 10);374375d -= (a * 3 + 0) >> 4;376a -= (d * 3 + 0) >> 3;377d = (a >> 1) - d;378a -= d;379380*pa = a;381*pd = d;382}383384/** Kron(Rotate(pi/8), Rotate(pi/8)) **/\385static Void fwdOddOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)386{387PixelI a, b, c, d, t1, t2;388389a = *pa;390b = -*pb;391c = -*pc;392d = *pd;393394/** butterflies **/395d += a;396c -= b;397a -= (t1 = d >> 1);398b += (t2 = c >> 1);399400/** rotate pi/4 **/401a += (b * 3 + 4) >> 3;402b -= (a * 3 + 3) >> 2;403a += (b * 3 + 3) >> 3;404405/** butterflies **/406b -= t2;407a += t1;408c += b;409d -= a;410411*pa = a;412*pb = b;413*pc = c;414*pd = d;415}416/** Kron(Rotate(pi/8), Rotate(pi/8)) **/417static Void fwdOddOddPre(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)418{419PixelI a, b, c, d, t1, t2;420a = *pa;421b = *pb;422c = *pc;423d = *pd;424425/** butterflies **/426d += a;427c -= b;428a -= (t1 = d >> 1);429b += (t2 = c >> 1);430431/** rotate pi/4 **/432a += (b * 3 + 4) >> 3;433b -= (a * 3 + 2) >> 2;434a += (b * 3 + 6) >> 3;435436/** butterflies **/437b -= t2;438a += t1;439c += b;440d -= a;441442*pa = a;443*pb = b;444*pc = c;445*pd = d;446}447448/** Kron(Rotate(pi/8), [1 1; 1 -1]/sqrt(2)) **/449/** [a b c d] => [D C A B] **/450Void fwdOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)451{452PixelI a, b, c, d;453a = *pa;454b = *pb;455c = *pc;456d = *pd;457458/** butterflies **/459b -= c;460a += d;461c += (b + 1) >> 1;462d = ((a + 1) >> 1) - d;463464/** rotate pi/8 **/465ROTATE2(a, b);466ROTATE2(c, d);467468/** butterflies **/469d += (b) >> 1;470c -= (a + 1) >> 1;471b -= d;472a += c;473474*pa = a;475*pb = b;476*pc = c;477*pd = d;478}479480/*************************************************************************481Top-level function to tranform possible part of a macroblock482*************************************************************************/483Void transformMacroblock(CWMImageStrCodec * pSC)484{485OVERLAP olOverlap = pSC->WMISCP.olOverlap;486COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;487Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);488Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);489Bool leftORright = (left || right), topORbottom = (top || bottom);490Bool topORleft = (left || top);// rightORbottom = (right || bottom);491Bool leftAdjacentColumn = (pSC->cColumn == 1), rightAdjacentColumn = (pSC->cColumn == pSC->cmbWidth - 1);492// Bool topAdjacentRow = (pSC->cRow == 1), bottomAdjacentRow = (pSC->cRow == pSC->cmbHeight - 1);493PixelI * p = NULL;// * pt = NULL;494Int i, j;495Int iNumChromaFullPlanes = (Int)((YUV_420 == cfColorFormat || YUV_422 == cfColorFormat) ?4961 : pSC->m_param.cNumChannels);497498#define mbX pSC->mbX499#define mbY pSC->mbY500#define tileX pSC->tileX501#define tileY pSC->tileY502#define bVertTileBoundary pSC->bVertTileBoundary503#define bHoriTileBoundary pSC->bHoriTileBoundary504#define bOneMBLeftVertTB pSC->bOneMBLeftVertTB505#define bOneMBRightVertTB pSC->bOneMBRightVertTB506#define iPredBefore pSC->iPredBefore507#define iPredAfter pSC->iPredAfter508509if (pSC->WMISCP.bUseHardTileBoundaries) {510//Add tile location information511if (pSC->cColumn == 0) {512bVertTileBoundary = FALSE;513tileY = 0;514}515bOneMBLeftVertTB = bOneMBRightVertTB = FALSE;516if(tileY > 0 && tileY <= pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn - 1) == pSC->WMISCP.uiTileY[tileY])517bOneMBRightVertTB = TRUE;518if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && pSC->cColumn == pSC->WMISCP.uiTileY[tileY + 1]) {519bVertTileBoundary = TRUE;520tileY++;521}522else523bVertTileBoundary = FALSE;524if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn + 1) == pSC->WMISCP.uiTileY[tileY + 1])525bOneMBLeftVertTB = TRUE;526527if (pSC->cRow == 0) {528bHoriTileBoundary = FALSE;529tileX = 0;530}531else if(mbY != pSC->cRow && tileX < pSC->WMISCP.cNumOfSliceMinus1V && pSC->cRow == pSC->WMISCP.uiTileX[tileX + 1]) {532bHoriTileBoundary = TRUE;533tileX++;534}535else if(mbY != pSC->cRow)536bHoriTileBoundary = FALSE;537}538else {539bVertTileBoundary = FALSE;540bHoriTileBoundary = FALSE;541bOneMBLeftVertTB = FALSE;542bOneMBRightVertTB = FALSE;543}544mbX = pSC->cColumn, mbY = pSC->cRow;545546//================================================================547// 400_Y, 444_YUV548for(i = 0; i < iNumChromaFullPlanes; ++i)549{550PixelI* const p0 = pSC->p0MBbuffer[i];//(0 == i ? pSC->pY0 : (1 == i ? pSC->pU0 : pSC->pV0));551PixelI* const p1 = pSC->p1MBbuffer[i];//(0 == i ? pSC->pY1 : (1 == i ? pSC->pU1 : pSC->pV1));552553//================================554// first level overlap555if(OL_NONE != olOverlap)556{557/* Corner operations */558if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))559strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);560if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))561strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);562if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))563strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);564if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))565strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);566if(!right && !bottom)567{568if (top || bHoriTileBoundary)569{570571for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)572{573p = p1 + j;574strPre4(p + 5, p + 4, p + 64, p + 65);575strPre4(p + 7, p + 6, p + 66, p + 67);576p = NULL;577}578}579else580{581for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)582{583strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);584}585}586587if (left || bVertTileBoundary)588{589if (!top && !bHoriTileBoundary)590{591strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);592strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);593}594595for (j = -64; j < -16; j += 16)596{597p = p1 + j;598strPre4(p + 74, p + 72, p + 80, p + 82);599strPre4(p + 75, p + 73, p + 81, p + 83);600p = NULL;601}602}603else604{605for (j = -64; j < -16; j += 16)606{607strPre4x4Stage1(p1 + j, 0);608}609}610611strPre4x4Stage1(p1 + 0, 0);612strPre4x4Stage1(p1 + 16, 0);613strPre4x4Stage1(p1 + 32, 0);614strPre4x4Stage1(p1 + 64, 0);615strPre4x4Stage1(p1 + 80, 0);616strPre4x4Stage1(p1 + 96, 0);617strPre4x4Stage1(p1 + 128, 0);618strPre4x4Stage1(p1 + 144, 0);619strPre4x4Stage1(p1 + 160, 0);620}621622if (bottom || bHoriTileBoundary)623{624for (j = ((left || bVertTileBoundary) ? 48 : -16); j < (right ? -16 : 240); j += 64)625{626p = p0 + j;627strPre4(p + 15, p + 14, p + 74, p + 75);628strPre4(p + 13, p + 12, p + 72, p + 73);629p = NULL;630}631}632633if ((right || bVertTileBoundary) && !bottom)634{635if (!top && !bHoriTileBoundary)636{637strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);638strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);639}640for (j = -64; j < -16; j += 16)641{642p = p1 + j;643strPre4(p + 15, p + 13, p + 21, p + 23);644strPre4(p + 14, p + 12, p + 20, p + 22);645p = NULL;646}647}648}649650//================================651// first level transform652if (!top)653{654for (j = (left ? 48 : -16); j < (right ? 48 : 240); j += 64)655{656strDCT4x4Stage1(p0 + j);657}658}659660if (!bottom)661{662for (j = (left ? 0 : -64); j < (right ? 0 : 192); j += 64)663{664strDCT4x4Stage1(p1 + j + 0);665strDCT4x4Stage1(p1 + j + 16);666strDCT4x4Stage1(p1 + j + 32);667}668}669670//================================671// second level overlap672if (OL_TWO == olOverlap)673{674/* Corner operations */675if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))676strPre4(p1 + 0, p1 + 64, p1 + 0 + 16, p1 + 64 + 16);677if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))678strPre4(p1 - 128, p1 - 64, p1 - 128 + 16, p1 - 64 + 16);679if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))680strPre4(p0 + 32, p0 + 96, p0 + 32 + 16, p0 + 96 + 16);681if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))682strPre4(p0 - 96, p0 - 32, p0 - 96 + 16, p0 - 32 + 16);683if ((leftORright || bVertTileBoundary) && (!topORbottom && !bHoriTileBoundary))684{685if (left || bVertTileBoundary) {686j = 0;687strPre4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);688strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);689}690if (right || bVertTileBoundary) {691j = -128;692strPre4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);693strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);694}695}696697if (!leftORright && !bVertTileBoundary)698{699if (topORbottom || bHoriTileBoundary)700{701if (top || bHoriTileBoundary) {702p = p1;703strPre4(p - 128, p - 64, p + 0, p + 64);704strPre4(p - 112, p - 48, p + 16, p + 80);705p = NULL;706}707if (bottom || bHoriTileBoundary) {708p = p0 + 32;709strPre4(p - 128, p - 64, p + 0, p + 64);710strPre4(p - 112, p - 48, p + 16, p + 80);711p = NULL;712}713}714else715{716strPre4x4Stage2Split(p0, p1);717}718}719}720721//================================722// second level transform723if (!topORleft){724if (pSC->m_param.bScaledArith) {725strNormalizeEnc(p0 - 256, (i != 0));726}727strDCT4x4SecondStage(p0 - 256);728}729}730731//================================================================732// 420_UV733for(i = 0; i < (YUV_420 == cfColorFormat? 2 : 0); ++i)734{735PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);736PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);737738//================================739// first level overlap (420_UV)740if (OL_NONE != olOverlap)741{742/* Corner operations */743if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))744strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);745if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))746strPre4(p1 - 27, p1 - 28, p1 - 25, p1 - 26);747if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))748strPre4(p0 + 16 + 10, p0 + 16 + 11, p0 + 16 + 8, p0 + 16 + 9);749if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))750strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);751if(!right && !bottom)752{753if (top || bHoriTileBoundary)754{755756for (j = ((left || bVertTileBoundary) ? 0 : -32); j < 32; j += 32)757{758p = p1 + j;759strPre4(p + 5, p + 4, p + 32, p + 33);760strPre4(p + 7, p + 6, p + 34, p + 35);761p = NULL;762}763}764else765{766for (j = ((left || bVertTileBoundary) ? 0: -32); j < 32; j += 32)767{768strPre4x4Stage1Split(p0 + 16 + j, p1 + j, 32);769}770}771772if (left || bVertTileBoundary)773{774if (!top && !bHoriTileBoundary)775{776strPre4(p0 + 26, p0 + 24, p1 + 0, p1 + 2);777strPre4(p0 + 27, p0 + 25, p1 + 1, p1 + 3);778}779780strPre4(p1 + 10, p1 + 8, p1 + 16, p1 + 18);781strPre4(p1 + 11, p1 + 9, p1 + 17, p1 + 19);782}783else if (!bVertTileBoundary)784{785strPre4x4Stage1(p1 - 32, 32);786}787788strPre4x4Stage1(p1, 32);789}790791if (bottom || bHoriTileBoundary)792{793for (j = ((left || bVertTileBoundary) ? 16: -16); j < (right ? -16: 32); j += 32)794{795p = p0 + j;796strPre4(p + 15, p + 14, p + 42, p + 43);797strPre4(p + 13, p + 12, p + 40, p + 41);798p = NULL;799}800}801802if ((right || bVertTileBoundary) && !bottom)803{804if (!top && !bHoriTileBoundary)805{806strPre4(p0 - 1, p0 - 3, p1 - 27, p1 - 25);807strPre4(p0 - 2, p0 - 4, p1 - 28, p1 - 26);808}809810strPre4(p1 - 17, p1 - 19, p1 - 11, p1 - 9);811strPre4(p1 - 18, p1 - 20, p1 - 12, p1 - 10);812}813}814815//================================816// first level transform (420_UV)817if (!top)818{819for (j = (left ? 16 : -16); j < (right ? 16 : 48); j += 32)820{821strDCT4x4Stage1(p0 + j);822}823}824825if (!bottom)826{827for (j = (left ? 0 : -32); j < (right ? 0 : 32); j += 32)828{829strDCT4x4Stage1(p1 + j);830}831}832833//================================834// second level overlap (420_UV)835if (OL_TWO == olOverlap)836{837if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))838COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 0, *(p1 - 64 + 32));839840if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))841iPredBefore[i][0] = *(p1 + 0);842if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))843COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 32, iPredBefore[i][0]);844845if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))846COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 16, *(p0 - 64 + 48));847848if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))849iPredBefore[i][1] = *(p0 + 16);850if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))851COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 48, iPredBefore[i][1]);852853if ((leftORright || bVertTileBoundary) && !topORbottom && !bHoriTileBoundary)854{855if (left || bVertTileBoundary)856strPre2(p0 + 0 + 16, p1 + 0);857if (right || bVertTileBoundary)858strPre2(p0 + -32 + 16, p1 + -32);859}860861if (!leftORright)862{863if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)864{865if (top || bHoriTileBoundary)866strPre2(p1 - 32, p1);867if (bottom || bHoriTileBoundary)868strPre2(p0 + 16 - 32, p0 + 16);869}870else if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary)871strPre2x2(p0 - 16, p0 + 16, p1 - 32, p1);872}873if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))874COMPUTE_CORNER_PRED_ADD(p1 - 64 + 0, *(p1 - 64 + 32));875if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))876iPredAfter[i][0] = *(p1 + 0);877if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))878COMPUTE_CORNER_PRED_ADD(p1 - 64 + 32, iPredAfter[i][0]);879if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))880COMPUTE_CORNER_PRED_ADD(p0 - 64 + 16, *(p0 - 64 + 48));881if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))882iPredAfter[i][1] = *(p0 + 16);883if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))884COMPUTE_CORNER_PRED_ADD(p0 - 64 + 48, iPredAfter[i][1]);885}886887//================================888// second level transform (420_UV)889if (!topORleft)890{891if (!pSC->m_param.bScaledArith) {892strDCT2x2dn(p0 - 64, p0 - 32, p0 - 48, p0 - 16);893}894else {895strDCT2x2dnEnc(p0 - 64, p0 - 32, p0 - 48, p0 - 16);896}897}898}899900//================================================================901// 422_UV902for(i = 0; i < (YUV_422 == cfColorFormat? 2 : 0); ++i)903{904PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);905PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);906907//================================908// first level overlap (422_UV)909if (OL_NONE != olOverlap)910{911/* Corner operations */912if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))913strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);914if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))915strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);916if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))917strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);918if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))919strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);920if(!right && !bottom)921{922if (top || bHoriTileBoundary)923{924925for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 64; j += 64)926{927p = p1 + j;928strPre4(p + 5, p + 4, p + 64, p + 65);929strPre4(p + 7, p + 6, p + 66, p + 67);930p = NULL;931}932}933else934{935for (j = ((left || bVertTileBoundary) ? 0: -64); j < 64; j += 64)936{937strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);938}939}940941if (left || bVertTileBoundary)942{943if (!top && !bHoriTileBoundary)944{945strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);946strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);947}948949for (j = 0; j < 48; j += 16)950{951p = p1 + j;952strPre4(p + 10, p + 8, p + 16, p + 18);953strPre4(p + 11, p + 9, p + 17, p + 19);954p = NULL;955}956}957else if (!bVertTileBoundary)958{959for (j = -64; j < -16; j += 16)960{961strPre4x4Stage1(p1 + j, 0);962}963}964965strPre4x4Stage1(p1 + 0, 0);966strPre4x4Stage1(p1 + 16, 0);967strPre4x4Stage1(p1 + 32, 0);968}969970if (bottom || bHoriTileBoundary)971{972for (j = ((left || bVertTileBoundary) ? 48: -16); j < (right ? -16: 112); j += 64)973{974p = p0 + j;975strPre4(p + 15, p + 14, p + 74, p + 75);976strPre4(p + 13, p + 12, p + 72, p + 73);977p = NULL;978}979}980981if ((right || bVertTileBoundary) && !bottom)982{983if (!top && !bHoriTileBoundary)984{985strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);986strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);987}988989for (j = -64; j < -16; j += 16)990{991p = p1 + j;992strPre4(p + 15, p + 13, p + 21, p + 23);993strPre4(p + 14, p + 12, p + 20, p + 22);994p = NULL;995}996}997}998999//================================1000// first level transform (422_UV)1001if (!top)1002{1003for (j = (left ? 48 : -16); j < (right ? 48 : 112); j += 64)1004{1005strDCT4x4Stage1(p0 + j);1006}1007}10081009if (!bottom)1010{1011for (j = (left ? 0 : -64); j < (right ? 0 : 64); j += 64)1012{1013strDCT4x4Stage1(p1 + j + 0);1014strDCT4x4Stage1(p1 + j + 16);1015strDCT4x4Stage1(p1 + j + 32);1016}1017}10181019//================================1020// second level overlap (422_UV)1021if (OL_TWO == olOverlap)1022{1023if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))1024COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 0, *(p1 - 128 + 64));10251026if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))1027iPredBefore[i][0] = *(p1 + 0);1028if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))1029COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 64, iPredBefore[i][0]);10301031if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))1032COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 48, *(p0 - 128 + 112));10331034if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))1035iPredBefore[i][1] = *(p0 + 48);1036if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))1037COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 112, iPredBefore[i][1]);10381039if (!bottom)1040{1041if (leftORright || bVertTileBoundary)1042{1043if (!top && !bHoriTileBoundary)1044{1045if (left || bVertTileBoundary)1046strPre2(p0 + 48 + 0, p1 + 0);10471048if (right || bVertTileBoundary)1049strPre2(p0 + 48 + -64, p1 + -64);1050}10511052if (left || bVertTileBoundary)1053strPre2(p1 + 16, p1 + 16 + 16);10541055if (right || bVertTileBoundary)1056strPre2(p1 + -48, p1 + -48 + 16);1057}10581059if (!leftORright && !bVertTileBoundary)1060{1061if (top || bHoriTileBoundary)1062strPre2(p1 - 64, p1);1063else1064strPre2x2(p0 - 16, p0 + 48, p1 - 64, p1);10651066strPre2x2(p1 - 48, p1 + 16, p1 - 32, p1 + 32);1067}1068}10691070if ((bottom || bHoriTileBoundary) && (!leftORright && !bVertTileBoundary))1071strPre2(p0 - 16, p0 + 48);10721073if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))1074COMPUTE_CORNER_PRED_ADD(p1 - 128 + 0, *(p1 - 128 + 64));10751076if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))1077iPredAfter[i][0] = *(p1 + 0);1078if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))1079COMPUTE_CORNER_PRED_ADD(p1 - 128 + 64, iPredAfter[i][0]);10801081if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))1082COMPUTE_CORNER_PRED_ADD(p0 - 128 + 48, *(p0 - 128 + 112));10831084if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))1085iPredAfter[i][1] = *(p0 + 48);1086if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))1087COMPUTE_CORNER_PRED_ADD(p0 - 128 + 112, iPredAfter[i][1]);1088}10891090//================================1091// second level transform (422_UV)1092if (!topORleft)1093{1094if (!pSC->m_param.bScaledArith) {1095strDCT2x2dn(p0 - 128, p0 - 64, p0 - 112, p0 - 48);1096strDCT2x2dn(p0 - 96, p0 - 32, p0 - 80, p0 - 16);1097}1098else {1099strDCT2x2dnEnc(p0 - 128, p0 - 64, p0 - 112, p0 - 48);1100strDCT2x2dnEnc(p0 - 96, p0 - 32, p0 - 80, p0 - 16);1101}11021103// 1D lossless HT1104p0[- 96] -= p0[-128];1105p0[-128] += ((p0[-96] + 1) >> 1);1106}1107}1108assert(NULL == p);1109}1110111111121113