Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/solaris/native/sun/java2d/loops/vis_FourByteAbgr.c
32288 views
/*1* Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)2627#include "vis_AlphaMacros.h"2829/***************************************************************/3031#define Gray2Argb(x) \320xff000000 | (x << 16) | (x << 8) | x3334/***************************************************************/3536#if VIS >= 0x2003738#define BMASK_FOR_ARGB \39vis_write_bmask(0x03214765, 0);4041#else4243#define BMASK_FOR_ARGB4445#endif4647/***************************************************************/4849#define RGB2ABGR_DB(x) \50x = vis_for(x, amask); \51ARGB2ABGR_DB(x)5253/***************************************************************/5455#define INSERT_U8_34R \56sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \57sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \58sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \59sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \60sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \61sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \62sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \63sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \64sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \65sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi)); \66sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi)); \67sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh)); \68sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh)); \69dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj)); \70dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj)); \71dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk)); \72dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk))7374/***************************************************************/7576void IntArgbToIntAbgrConvert_line(mlib_s32 *srcBase,77mlib_s32 *dstBase,78mlib_s32 width)79{80mlib_s32 *dst_end = dstBase + width;81mlib_d64 dd;82mlib_f32 ff;8384BMASK_FOR_ARGB8586if ((mlib_s32)srcBase & 7) {87ff = *(mlib_f32*)srcBase;88ARGB2ABGR_FL(ff)89*(mlib_f32*)dstBase = ff;90srcBase++;91dstBase++;92}9394if ((mlib_s32)dstBase & 7) {95#pragma pipeloop(0)96for (; dstBase <= (dst_end - 2); dstBase += 2) {97dd = *(mlib_d64*)srcBase;98ARGB2ABGR_DB(dd)99((mlib_f32*)dstBase)[0] = vis_read_hi(dd);100((mlib_f32*)dstBase)[1] = vis_read_lo(dd);101srcBase += 2;102}103} else {104#pragma pipeloop(0)105for (; dstBase <= (dst_end - 2); dstBase += 2) {106dd = *(mlib_d64*)srcBase;107ARGB2ABGR_DB(dd)108*(mlib_d64*)dstBase = dd;109srcBase += 2;110}111}112113if (dstBase < dst_end) {114ff = *(mlib_f32*)srcBase;115ARGB2ABGR_FL(ff)116*(mlib_f32*)dstBase = ff;117}118}119120/***************************************************************/121122void ADD_SUFF(FourByteAbgrToIntArgbConvert)(BLIT_PARAMS)123{124mlib_u32 *argb = (mlib_u32 *)dstBase;125mlib_u8 *pabgr = (mlib_u8 *)srcBase;126mlib_s32 dstScan = (pDstInfo)->scanStride;127mlib_s32 srcScan = (pSrcInfo)->scanStride;128mlib_s32 i, j, count, left;129mlib_d64 w_abgr;130131if (width < 16) {132for (j = 0; j < height; j++) {133mlib_u8 *src = srcBase;134mlib_s32 *dst = dstBase;135136for (i = 0; i < width; i++) {137*dst++ = (src[0] << 24) | (src[3] << 16) |138(src[2] << 8) | (src[1]);139src += 4;140}141142PTR_ADD(dstBase, dstScan);143PTR_ADD(srcBase, srcScan);144}145return;146}147148if (dstScan == 4*width && srcScan == dstScan) {149width *= height;150height = 1;151}152count = width >> 1;153left = width & 1;154155BMASK_FOR_ARGB156157if ((((mlib_addr)pabgr & 3) == 0) && ((srcScan & 3) == 0)) {158mlib_u32 *abgr = (mlib_u32 *)pabgr;159160dstScan >>= 2;161srcScan >>= 2;162163for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {164if ((((mlib_addr) argb | (mlib_addr) abgr) & 7) == 0) {165mlib_d64 *d_abgr = (mlib_d64 *) abgr;166mlib_d64 *d_argb = (mlib_d64 *) argb;167168#pragma pipeloop(0)169for (j = 0; j < count; j++) {170w_abgr = d_abgr[j];171ARGB2ABGR_DB(w_abgr)172d_argb[j] = w_abgr;173}174175if (left) {176w_abgr = d_abgr[count];177ARGB2ABGR_DB(w_abgr)178((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);179}180} else {181mlib_f32 v_abgr0, v_abgr1;182183#pragma pipeloop(0)184for (j = 0; j < count; j++) {185v_abgr0 = ((mlib_f32 *) abgr)[2 * j];186v_abgr1 = ((mlib_f32 *) abgr)[2 * j + 1];187w_abgr = vis_freg_pair(v_abgr0, v_abgr1);188ARGB2ABGR_DB(w_abgr)189((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);190((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);191}192193if (left) {194v_abgr0 = ((mlib_f32 *) abgr)[2 * count];195w_abgr = vis_freg_pair(v_abgr0, 0);196ARGB2ABGR_DB(w_abgr)197((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);198}199}200}201} else { /* abgr is not aligned */202mlib_u8 *abgr = pabgr;203mlib_d64 *d_abgr, db0, db1;204205dstScan >>= 2;206207for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {208d_abgr = vis_alignaddr(abgr, 0);209db0 = *d_abgr++;210211if (((mlib_addr) argb & 7) == 0) {212mlib_d64 *d_argb = (mlib_d64 *) argb;213214#pragma pipeloop(0)215for (j = 0; j < count; j++) {216db1 = d_abgr[j];217w_abgr = vis_faligndata(db0, db1);218db0 = db1;219ARGB2ABGR_DB(w_abgr)220d_argb[j] = w_abgr;221}222223if (left) {224db1 = d_abgr[j];225w_abgr = vis_faligndata(db0, db1);226ARGB2ABGR_DB(w_abgr)227((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);228}229} else {230mlib_d64 w_abgr;231232db1 = *d_abgr++;233w_abgr = vis_faligndata(db0, db1);234db0 = db1;235#pragma pipeloop(0)236for (j = 0; j < count; j++) {237ARGB2ABGR_DB(w_abgr)238((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);239((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);240db1 = d_abgr[j];241w_abgr = vis_faligndata(db0, db1);242db0 = db1;243}244245if (left) {246ARGB2ABGR_DB(w_abgr)247((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);248}249}250}251}252}253254/***************************************************************/255256void ADD_SUFF(IntArgbToFourByteAbgrConvert)(BLIT_PARAMS)257{258mlib_u32 *argb = (mlib_u32 *)srcBase;259mlib_u8 *abgr = (mlib_u8 *)dstBase;260mlib_s32 dstScan = (pDstInfo)->scanStride;261mlib_s32 srcScan = (pSrcInfo)->scanStride;262mlib_s32 i, j, count, left;263mlib_d64 w_abgr;264265if (width < 16) {266for (j = 0; j < height; j++) {267mlib_s32 *src = srcBase;268mlib_u8 *dst = dstBase;269270for (i = 0; i < width; i++) {271mlib_u32 x = *src++;272dst[0] = x >> 24;273dst[1] = x;274dst[2] = x >> 8;275dst[3] = x >> 16;276dst += 4;277}278279PTR_ADD(dstBase, dstScan);280PTR_ADD(srcBase, srcScan);281}282return;283}284285if (dstScan == 4*width && srcScan == dstScan) {286width *= height;287height = 1;288}289count = width >> 1;290left = width & 1;291292BMASK_FOR_ARGB293294srcScan >>= 2;295296for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {297298if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {299mlib_d64 *d_argb = (mlib_d64 *) argb;300mlib_d64 *d_abgr = (mlib_d64 *) abgr;301302#pragma pipeloop(0)303for (j = 0; j < count; j++) {304w_abgr = d_argb[j];305ARGB2ABGR_DB(w_abgr)306d_abgr[j] = w_abgr;307}308309if (left) {310w_abgr = d_argb[count];311ARGB2ABGR_DB(w_abgr)312((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);313}314315} else if (((mlib_addr) abgr & 3) == 0) {316mlib_f32 v_argb0, v_argb1;317318#pragma pipeloop(0)319for (j = 0; j < count; j++) {320v_argb0 = ((mlib_f32 *) argb)[2 * j];321v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];322w_abgr = vis_freg_pair(v_argb0, v_argb1);323324ARGB2ABGR_DB(w_abgr)325((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);326((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);327}328329if (left) {330v_argb0 = ((mlib_f32 *) argb)[2 * count];331w_abgr = vis_freg_pair(v_argb0, vis_fzeros());332333ARGB2ABGR_DB(w_abgr)334((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);335}336337} else { /* abgr is not aligned */338339mlib_u8 *pend = abgr + (width << 2) - 1;340mlib_d64 *d_abgr, db0, db1;341mlib_s32 emask, off;342mlib_f32 *f_argb = (mlib_f32 *) argb;343344off = (mlib_addr)abgr & 7;345vis_alignaddr((void *)(8 - off), 0);346d_abgr = (mlib_d64 *) (abgr - off);347348db1 = vis_freg_pair(*f_argb++, *f_argb++);349ARGB2ABGR_DB(db1)350w_abgr = vis_faligndata(db1, db1);351emask = vis_edge8(abgr, pend);352vis_pst_8(w_abgr, d_abgr++, emask);353db0 = db1;354355db1 = vis_freg_pair(f_argb[0], f_argb[1]);356#pragma pipeloop(0)357for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {358ARGB2ABGR_DB(db1)359w_abgr = vis_faligndata(db0, db1);360*d_abgr++ = w_abgr;361db0 = db1;362f_argb += 2;363db1 = vis_freg_pair(f_argb[0], f_argb[1]);364}365366if ((mlib_addr)d_abgr <= (mlib_addr)pend) {367ARGB2ABGR_DB(db1)368w_abgr = vis_faligndata(db0, db1);369emask = vis_edge8(d_abgr, pend);370vis_pst_8(w_abgr, d_abgr, emask);371}372}373}374}375376/***************************************************************/377378void ADD_SUFF(IntRgbToFourByteAbgrConvert)(BLIT_PARAMS)379{380mlib_u32 *argb = (mlib_u32 *)srcBase;381mlib_u8 *abgr = (mlib_u8 *)dstBase;382mlib_s32 dstScan = (pDstInfo)->scanStride;383mlib_s32 srcScan = (pSrcInfo)->scanStride;384mlib_s32 i, j, count, left;385mlib_d64 w_abgr;386mlib_d64 amask = vis_to_double_dup(0xFF000000);387388if (width < 16) {389for (j = 0; j < height; j++) {390mlib_s32 *src = srcBase;391mlib_u8 *dst = dstBase;392393for (i = 0; i < width; i++) {394mlib_u32 x = *src++;395dst[0] = 0xFF;396dst[1] = x;397dst[2] = x >> 8;398dst[3] = x >> 16;399dst += 4;400}401402PTR_ADD(dstBase, dstScan);403PTR_ADD(srcBase, srcScan);404}405return;406}407408if (dstScan == 4*width && srcScan == dstScan) {409width *= height;410height = 1;411}412count = width >> 1;413left = width & 1;414415BMASK_FOR_ARGB416417srcScan >>= 2;418419for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {420421if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {422mlib_d64 *d_argb = (mlib_d64 *) argb;423mlib_d64 *d_abgr = (mlib_d64 *) abgr;424425#pragma pipeloop(0)426for (j = 0; j < count; j++) {427w_abgr = d_argb[j];428RGB2ABGR_DB(w_abgr)429d_abgr[j] = w_abgr;430}431432if (left) {433w_abgr = d_argb[count];434RGB2ABGR_DB(w_abgr)435((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);436}437438} else if (((mlib_addr) abgr & 3) == 0) {439mlib_f32 v_argb0, v_argb1;440441#pragma pipeloop(0)442for (j = 0; j < count; j++) {443v_argb0 = ((mlib_f32 *) argb)[2 * j];444v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];445w_abgr = vis_freg_pair(v_argb0, v_argb1);446447RGB2ABGR_DB(w_abgr)448((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);449((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);450}451452if (left) {453v_argb0 = ((mlib_f32 *) argb)[2 * count];454w_abgr = vis_freg_pair(v_argb0, vis_fzeros());455456RGB2ABGR_DB(w_abgr)457((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);458}459460} else { /* abgr is not aligned */461462mlib_u8 *pend = abgr + (width << 2) - 1;463mlib_d64 *d_abgr, db0, db1;464mlib_s32 emask, off;465mlib_f32 *f_argb = (mlib_f32 *) argb;466467off = (mlib_addr)abgr & 7;468vis_alignaddr((void *)(8 - off), 0);469d_abgr = (mlib_d64 *) (abgr - off);470471db1 = vis_freg_pair(*f_argb++, *f_argb++);472RGB2ABGR_DB(db1)473w_abgr = vis_faligndata(db1, db1);474emask = vis_edge8(abgr, pend);475vis_pst_8(w_abgr, d_abgr++, emask);476db0 = db1;477478db1 = vis_freg_pair(f_argb[0], f_argb[1]);479#pragma pipeloop(0)480for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {481RGB2ABGR_DB(db1)482w_abgr = vis_faligndata(db0, db1);483*d_abgr++ = w_abgr;484db0 = db1;485f_argb += 2;486db1 = vis_freg_pair(f_argb[0], f_argb[1]);487}488489if ((mlib_addr)d_abgr <= (mlib_addr)pend) {490RGB2ABGR_DB(db1)491w_abgr = vis_faligndata(db0, db1);492emask = vis_edge8(d_abgr, pend);493vis_pst_8(w_abgr, d_abgr, emask);494}495}496}497}498499/***************************************************************/500501void ADD_SUFF(ThreeByteBgrToFourByteAbgrConvert)(BLIT_PARAMS)502{503mlib_s32 dstScan = pDstInfo->scanStride;504mlib_s32 srcScan = pSrcInfo->scanStride;505mlib_d64 sd0, sd1, sd2;506mlib_d64 dd0, dd1, dd2, dd3;507mlib_d64 sda, sdb, sdc, sdd;508mlib_d64 sde, sdf, sdg, sdh;509mlib_d64 sdi, sdj, sdk, sdl;510mlib_d64 sdm;511mlib_d64 sFF;512mlib_s32 r, g, b;513mlib_s32 i, j;514515if (width < 16) {516for (j = 0; j < height; j++) {517mlib_u8 *src = srcBase;518mlib_u8 *dst = dstBase;519520#pragma pipeloop(0)521for (i = 0; i < width; i++) {522dst[0] = 0xFF;523dst[1] = src[0];524dst[2] = src[1];525dst[3] = src[2];526src += 3;527dst += 4;528}529530PTR_ADD(dstBase, dstScan);531PTR_ADD(srcBase, srcScan);532}533return;534}535536if (dstScan == 4*width && srcScan == 3*width) {537width *= height;538height = 1;539}540541sFF = vis_fone();542543for (j = 0; j < height; j++) {544mlib_u8 *pSrc = srcBase;545mlib_u8 *pDst = dstBase;546547if (!(((mlib_s32)pSrc | (mlib_s32)pDst) & 7)) {548#pragma pipeloop(0)549for (i = 0; i <= ((mlib_s32)width - 8); i += 8) {550sd0 = ((mlib_d64*)pSrc)[0];551sd1 = ((mlib_d64*)pSrc)[1];552sd2 = ((mlib_d64*)pSrc)[2];553pSrc += 3*8;554INSERT_U8_34R;555((mlib_d64*)pDst)[0] = dd0;556((mlib_d64*)pDst)[1] = dd1;557((mlib_d64*)pDst)[2] = dd2;558((mlib_d64*)pDst)[3] = dd3;559pDst += 4*8;560}561562for (; i < width; i++) {563b = pSrc[0];564g = pSrc[1];565r = pSrc[2];566((mlib_u16*)pDst)[0] = 0xff00 | b;567((mlib_u16*)pDst)[1] = (g << 8) | r;568pSrc += 3;569pDst += 4;570}571} else if (!((mlib_s32)pDst & 1)) {572#pragma pipeloop(0)573for (i = 0; i < width; i++) {574b = pSrc[0];575g = pSrc[1];576r = pSrc[2];577((mlib_u16*)pDst)[0] = 0xff00 | b;578((mlib_u16*)pDst)[1] = (g << 8) | r;579pSrc += 3;580pDst += 4;581}582} else {583*pDst++ = 0xff;584#pragma pipeloop(0)585for (i = 0; i < (mlib_s32)width - 1; i++) {586b = pSrc[0];587g = pSrc[1];588r = pSrc[2];589((mlib_u16*)pDst)[0] = (b << 8) | g;590((mlib_u16*)pDst)[1] = (r << 8) | 0xff;591pSrc += 3;592pDst += 4;593}594if (width) {595pDst[0] = pSrc[0];596pDst[1] = pSrc[1];597pDst[2] = pSrc[2];598}599}600601PTR_ADD(dstBase, dstScan);602PTR_ADD(srcBase, srcScan);603}604}605606/***************************************************************/607608#if 1609610#define LOAD_BGR(dd) { \611mlib_u8 *sp = pSrc - 1 + 3*(tmpsxloc >> shift); \612mlib_d64 *ap = (void*)((mlib_addr)sp &~ 7); \613vis_alignaddr(sp, 0); \614dd = vis_faligndata(ap[0], ap[1]); \615tmpsxloc += sxinc; \616}617618#else619620#define LOAD_BGR(dd) { \621mlib_u8 *sp = pSrc + 3*(tmpsxloc >> shift); \622dd = vis_faligndata(vis_ld_u8(sp + 2), dd); \623dd = vis_faligndata(vis_ld_u8(sp + 1), dd); \624dd = vis_faligndata(vis_ld_u8(sp ), dd); \625dd = vis_faligndata(amask, dd); \626tmpsxloc += sxinc; \627}628629#endif630631/***************************************************************/632633void ADD_SUFF(ThreeByteBgrToFourByteAbgrScaleConvert)(SCALE_PARAMS)634{635mlib_s32 dstScan = pDstInfo->scanStride;636mlib_s32 srcScan = pSrcInfo->scanStride;637mlib_d64 d0;638mlib_d64 amask;639mlib_s32 r, g, b;640mlib_s32 i, j;641642if (width < 16 /*|| (((mlib_s32)dstBase | dstScan) & 3)*/) {643for (j = 0; j < height; j++) {644mlib_u8 *pSrc = srcBase;645mlib_u8 *pDst = dstBase;646mlib_s32 tmpsxloc = sxloc;647648PTR_ADD(pSrc, (syloc >> shift) * srcScan);649650#pragma pipeloop(0)651for (i = 0; i < width; i++) {652mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);653pDst[0] = 0xff;654pDst[1] = pp[0];655pDst[2] = pp[1];656pDst[3] = pp[2];657tmpsxloc += sxinc;658pDst += 4;659}660661PTR_ADD(dstBase, dstScan);662syloc += syinc;663}664return;665}666667vis_alignaddr(NULL, 7);668amask = vis_to_double_dup(0xFF000000);669670for (j = 0; j < height; j++) {671mlib_u8 *pSrc = srcBase;672mlib_u8 *pDst = dstBase;673mlib_s32 tmpsxloc = sxloc;674675PTR_ADD(pSrc, (syloc >> shift) * srcScan);676677if (!((mlib_s32)pDst & 3)) {678#pragma pipeloop(0)679for (i = 0; i < width; i++) {680LOAD_BGR(d0);681((mlib_f32*)pDst)[0] = vis_fors(vis_read_hi(d0),682vis_read_hi(amask));683pDst += 4;684}685} else if (!((mlib_s32)pDst & 1)) {686#pragma pipeloop(0)687for (i = 0; i < width; i++) {688mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);689tmpsxloc += sxinc;690b = pp[0];691g = pp[1];692r = pp[2];693((mlib_u16*)pDst)[2*i ] = 0xff00 | b;694((mlib_u16*)pDst)[2*i + 1] = (g << 8) | r;695}696} else {697*pDst++ = 0xff;698#pragma pipeloop(0)699for (i = 0; i < (mlib_s32)width - 1; i++) {700mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);701tmpsxloc += sxinc;702b = pp[0];703g = pp[1];704r = pp[2];705((mlib_u16*)pDst)[2*i ] = (b << 8) | g;706((mlib_u16*)pDst)[2*i + 1] = (r << 8) | 0xff;707}708if (width) {709mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);710tmpsxloc += sxinc;711pDst[4*i ] = pp[0];712pDst[4*i+1] = pp[1];713pDst[4*i+2] = pp[2];714}715}716717PTR_ADD(dstBase, dstScan);718syloc += syinc;719}720}721722/***************************************************************/723724void ADD_SUFF(ByteGrayToFourByteAbgrConvert)(BLIT_PARAMS)725{726mlib_s32 dstScan = pDstInfo->scanStride;727mlib_s32 srcScan = pSrcInfo->scanStride;728mlib_d64 d0, d1, d2, d3;729mlib_f32 ff, aa = vis_fones();730mlib_s32 i, j, x;731732if (!(((mlib_s32)dstBase | dstScan) & 3)) {733ADD_SUFF(ByteGrayToIntArgbConvert)(BLIT_CALL_PARAMS);734return;735}736737if (width < 16) {738for (j = 0; j < height; j++) {739mlib_u8 *src = srcBase;740mlib_u8 *dst = dstBase;741742for (i = 0; i < width; i++) {743x = *src++;744dst[0] = 0xff;745dst[1] = x;746dst[2] = x;747dst[3] = x;748dst += 4;749}750751PTR_ADD(dstBase, dstScan);752PTR_ADD(srcBase, srcScan);753}754return;755}756757if (srcScan == width && dstScan == 4*width) {758width *= height;759height = 1;760}761762for (j = 0; j < height; j++) {763mlib_u8 *src = srcBase;764mlib_u8 *dst = dstBase;765mlib_u8 *dst_end;766767dst_end = dst + 4*width;768769while (((mlib_s32)src & 3) && dst < dst_end) {770x = *src++;771dst[0] = 0xff;772dst[1] = x;773dst[2] = x;774dst[3] = x;775dst += 4;776}777778if (!((mlib_s32)dst & 3)) {779#pragma pipeloop(0)780for (; dst <= (dst_end - 4*4); dst += 4*4) {781ff = *(mlib_f32*)src;782d0 = vis_fpmerge(aa, ff);783d1 = vis_fpmerge(ff, ff);784d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));785d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));786((mlib_f32*)dst)[0] = vis_read_hi(d2);787((mlib_f32*)dst)[1] = vis_read_lo(d2);788((mlib_f32*)dst)[2] = vis_read_hi(d3);789((mlib_f32*)dst)[3] = vis_read_lo(d3);790src += 4;791}792} else {793mlib_d64 *dp;794795dp = vis_alignaddr(dst, 0);796d3 = vis_faligndata(dp[0], dp[0]);797vis_alignaddrl(dst, 0);798799#pragma pipeloop(0)800for (; dst <= (dst_end - 4*4); dst += 4*4) {801ff = *(mlib_f32*)src;802d0 = vis_fpmerge(aa, ff);803d1 = vis_fpmerge(ff, ff);804d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));805*dp++ = vis_faligndata(d3, d2);806d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));807*dp++ = vis_faligndata(d2, d3);808src += 4;809}810811vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));812}813814while (dst < dst_end) {815x = *src++;816dst[0] = 0xff;817dst[1] = x;818dst[2] = x;819dst[3] = x;820dst += 4;821}822823PTR_ADD(dstBase, dstScan);824PTR_ADD(srcBase, srcScan);825}826}827828/***************************************************************/829830void ADD_SUFF(IntArgbToFourByteAbgrXorBlit)(BLIT_PARAMS)831{832mlib_s32 dstScan = pDstInfo->scanStride;833mlib_s32 srcScan = pSrcInfo->scanStride;834mlib_u32 xorpixel = pCompInfo->details.xorPixel;835mlib_u32 alphamask = pCompInfo->alphaMask;836mlib_d64 dd, d_xorpixel, d_alphamask, d_zero;837mlib_s32 i, j, x, neg_mask;838839if (width < 16) {840xorpixel = (xorpixel << 24) | (xorpixel >> 8);841alphamask = (alphamask << 24) | (alphamask >> 8);842843for (j = 0; j < height; j++) {844mlib_s32 *src = srcBase;845mlib_u8 *dst = dstBase;846847for (i = 0; i < width; i++) {848x = src[i];849neg_mask = x >> 31;850x = (x ^ xorpixel) & (neg_mask &~ alphamask);851dst[0] ^= x >> 24;852dst[1] ^= x;853dst[2] ^= x >> 8;854dst[3] ^= x >> 16;855dst += 4;856}857858PTR_ADD(dstBase, dstScan);859PTR_ADD(srcBase, srcScan);860}861return;862}863864if (srcScan == 4*width && dstScan == 4*width) {865width *= height;866height = 1;867}868869d_zero = vis_fzero();870d_xorpixel = vis_freg_pair(vis_ldfa_ASI_PL(&xorpixel),871vis_ldfa_ASI_PL(&xorpixel));872d_alphamask = vis_freg_pair(vis_ldfa_ASI_PL(&alphamask),873vis_ldfa_ASI_PL(&alphamask));874875dd = vis_freg_pair(vis_read_hi(d_xorpixel), vis_read_hi(d_alphamask));876ARGB2ABGR_DB(dd)877xorpixel = ((mlib_s32*)&dd)[0];878alphamask = ((mlib_s32*)&dd)[1];879880for (j = 0; j < height; j++) {881mlib_s32 *src = srcBase;882mlib_u8 *dst = dstBase;883mlib_u8 *dst_end;884885dst_end = dst + 4*width;886887if (!((mlib_s32)dst & 7)) {888#pragma pipeloop(0)889for (; dst <= (dst_end - 8); dst += 8) {890dd = vis_freg_pair(((mlib_f32*)src)[0], ((mlib_f32*)src)[1]);891src += 2;892neg_mask = vis_fcmplt32(dd, d_zero);893ARGB2ABGR_DB(dd)894dd = vis_fxor(dd, d_xorpixel);895dd = vis_fandnot(d_alphamask, dd);896dd = vis_fxor(dd, *(mlib_d64*)dst);897vis_pst_32(dd, dst, neg_mask);898}899}900901while (dst < dst_end) {902x = *src++;903neg_mask = x >> 31;904x = (x ^ xorpixel) & (neg_mask &~ alphamask);905dst[0] ^= x >> 24;906dst[1] ^= x;907dst[2] ^= x >> 8;908dst[3] ^= x >> 16;909dst += 4;910}911912PTR_ADD(dstBase, dstScan);913PTR_ADD(srcBase, srcScan);914}915}916917/***************************************************************/918919void ADD_SUFF(ByteGrayToFourByteAbgrScaleConvert)(SCALE_PARAMS)920{921mlib_s32 dstScan = pDstInfo->scanStride;922mlib_s32 srcScan = pSrcInfo->scanStride;923mlib_d64 d0, d1, d2, d3, dd;924mlib_f32 ff, aa;925mlib_s32 i, j, x;926927/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {928ADD_SUFF(ByteGrayToIntArgbScaleConvert)(SCALE_CALL_PARAMS);929return;930}*/931932if (width < 16) {933for (j = 0; j < height; j++) {934mlib_u8 *src = srcBase;935mlib_u8 *dst = dstBase;936mlib_s32 tmpsxloc = sxloc;937938PTR_ADD(src, (syloc >> shift) * srcScan);939940for (i = 0; i < width; i++) {941x = src[tmpsxloc >> shift];942tmpsxloc += sxinc;943dst[4*i ] = 0xff;944dst[4*i + 1] = x;945dst[4*i + 2] = x;946dst[4*i + 3] = x;947}948949PTR_ADD(dstBase, dstScan);950syloc += syinc;951}952return;953}954955aa = vis_fones();956957for (j = 0; j < height; j++) {958mlib_u8 *src = srcBase;959mlib_u8 *dst = dstBase;960mlib_u8 *dst_end;961mlib_s32 tmpsxloc = sxloc;962963PTR_ADD(src, (syloc >> shift) * srcScan);964965dst_end = dst + 4*width;966967if (!((mlib_s32)dst & 3)) {968vis_alignaddr(NULL, 7);969#pragma pipeloop(0)970for (; dst <= (dst_end - 4*4); dst += 4*4) {971LOAD_NEXT_U8(dd, src + ((tmpsxloc + 3*sxinc) >> shift));972LOAD_NEXT_U8(dd, src + ((tmpsxloc + 2*sxinc) >> shift));973LOAD_NEXT_U8(dd, src + ((tmpsxloc + sxinc) >> shift));974LOAD_NEXT_U8(dd, src + ((tmpsxloc ) >> shift));975tmpsxloc += 4*sxinc;976ff = vis_read_hi(dd);977d0 = vis_fpmerge(aa, ff);978d1 = vis_fpmerge(ff, ff);979d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));980d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));981((mlib_f32*)dst)[0] = vis_read_hi(d2);982((mlib_f32*)dst)[1] = vis_read_lo(d2);983((mlib_f32*)dst)[2] = vis_read_hi(d3);984((mlib_f32*)dst)[3] = vis_read_lo(d3);985}986} else {987mlib_d64 *dp;988989dp = vis_alignaddr(dst, 0);990d3 = vis_faligndata(dp[0], dp[0]);991vis_alignaddrl(dst, 0);992993#pragma pipeloop(0)994for (; dst <= (dst_end - 4*4); dst += 4*4) {995mlib_d64 s0, s1, s2, s3;996s0 = vis_ld_u8(src + ((tmpsxloc ) >> shift));997s1 = vis_ld_u8(src + ((tmpsxloc + sxinc) >> shift));998s2 = vis_ld_u8(src + ((tmpsxloc + 2*sxinc) >> shift));999s3 = vis_ld_u8(src + ((tmpsxloc + 3*sxinc) >> shift));1000tmpsxloc += 4*sxinc;1001s0 = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s2));1002s1 = vis_fpmerge(vis_read_lo(s1), vis_read_lo(s3));1003dd = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s1));1004ff = vis_read_lo(dd);1005d0 = vis_fpmerge(aa, ff);1006d1 = vis_fpmerge(ff, ff);1007d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));1008*dp++ = vis_faligndata(d3, d2);1009d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));1010*dp++ = vis_faligndata(d2, d3);1011}10121013vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));1014}10151016while (dst < dst_end) {1017x = src[tmpsxloc >> shift];1018tmpsxloc += sxinc;1019dst[0] = 0xff;1020dst[1] = x;1021dst[2] = x;1022dst[3] = x;1023dst += 4;1024}10251026PTR_ADD(dstBase, dstScan);1027syloc += syinc;1028}1029}10301031/***************************************************************/10321033void ADD_SUFF(ByteIndexedToFourByteAbgrConvert)(BLIT_PARAMS)1034{1035jint *pixLut = pSrcInfo->lutBase;1036mlib_s32 dstScan = pDstInfo->scanStride;1037mlib_s32 srcScan = pSrcInfo->scanStride;1038mlib_d64 dd, d_old;1039mlib_s32 i, j, x;10401041/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {1042ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);1043return;1044}*/10451046if (width < 8) {1047for (j = 0; j < height; j++) {1048mlib_u8 *src = srcBase;1049mlib_u8 *dst = dstBase;10501051for (i = 0; i < width; i++) {1052x = pixLut[src[i]];1053dst[4*i ] = x >> 24;1054dst[4*i + 1] = x;1055dst[4*i + 2] = x >> 8;1056dst[4*i + 3] = x >> 16;1057}10581059PTR_ADD(dstBase, dstScan);1060PTR_ADD(srcBase, srcScan);1061}1062return;1063}10641065if (srcScan == width && dstScan == 4*width) {1066width *= height;1067height = 1;1068}10691070BMASK_FOR_ARGB10711072for (j = 0; j < height; j++) {1073mlib_u8 *src = srcBase;1074mlib_u8 *dst = dstBase;1075mlib_u8 *dst_end;10761077dst_end = dst + 4*width;10781079if (!((mlib_s32)dst & 7)) {1080#pragma pipeloop(0)1081for (; dst <= (dst_end - 2*4); dst += 2*4) {1082dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],1083((mlib_f32*)pixLut)[src[1]]);1084ARGB2ABGR_DB(dd)1085*(mlib_d64*)dst = dd;1086src += 2;1087}1088} else {1089mlib_d64 *dp;10901091dp = vis_alignaddr(dst, 0);1092dd = vis_faligndata(dp[0], dp[0]);1093vis_alignaddrl(dst, 0);10941095#pragma pipeloop(0)1096for (; dst <= (dst_end - 2*4); dst += 2*4) {1097d_old = dd;1098dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],1099((mlib_f32*)pixLut)[src[1]]);1100ARGB2ABGR_DB(dd)1101*dp++ = vis_faligndata(d_old, dd);1102src += 2;1103}11041105vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));1106}11071108while (dst < dst_end) {1109x = pixLut[*src++];1110dst[0] = x >> 24;1111dst[1] = x;1112dst[2] = x >> 8;1113dst[3] = x >> 16;1114dst += 4;1115}11161117PTR_ADD(dstBase, dstScan);1118PTR_ADD(srcBase, srcScan);1119}1120}11211122/***************************************************************/11231124void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparOver)(BLIT_PARAMS)1125{1126jint *pixLut = pSrcInfo->lutBase;1127mlib_s32 dstScan = pDstInfo->scanStride;1128mlib_s32 srcScan = pSrcInfo->scanStride;1129mlib_d64 dd, dzero;1130mlib_s32 i, j, x, mask;11311132/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {1133ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);1134return;1135}*/11361137if (width < 8) {1138for (j = 0; j < height; j++) {1139mlib_u8 *src = srcBase;1140mlib_u8 *dst = dstBase;11411142for (i = 0; i < width; i++) {1143x = pixLut[src[i]];1144if (x < 0) {1145dst[4*i ] = x >> 24;1146dst[4*i + 1] = x;1147dst[4*i + 2] = x >> 8;1148dst[4*i + 3] = x >> 16;1149}1150}11511152PTR_ADD(dstBase, dstScan);1153PTR_ADD(srcBase, srcScan);1154}1155return;1156}11571158if (srcScan == width && dstScan == 4*width) {1159width *= height;1160height = 1;1161}11621163BMASK_FOR_ARGB11641165dzero = vis_fzero();11661167for (j = 0; j < height; j++) {1168mlib_u8 *src = srcBase;1169mlib_u8 *dst = dstBase;1170mlib_u8 *dst_end;11711172dst_end = dst + 4*width;11731174if (!((mlib_s32)dst & 7)) {1175#pragma pipeloop(0)1176for (; dst <= (dst_end - 2*4); dst += 2*4) {1177dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],1178((mlib_f32*)pixLut)[src[1]]);1179mask = vis_fcmplt32(dd, dzero);1180ARGB2ABGR_DB(dd)1181vis_pst_32(dd, dst, mask);1182src += 2;1183}1184}11851186while (dst < dst_end) {1187x = pixLut[*src++];1188if (x < 0) {1189dst[0] = x >> 24;1190dst[1] = x;1191dst[2] = x >> 8;1192dst[3] = x >> 16;1193}1194dst += 4;1195}11961197PTR_ADD(dstBase, dstScan);1198PTR_ADD(srcBase, srcScan);1199}1200}12011202/***************************************************************/12031204void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparBgCopy)(BCOPY_PARAMS)1205{1206jint *pixLut = pSrcInfo->lutBase;1207mlib_s32 dstScan = pDstInfo->scanStride;1208mlib_s32 srcScan = pSrcInfo->scanStride;1209mlib_d64 dd, dzero, d_bgpixel;1210mlib_s32 i, j, x, mask;1211mlib_s32 bgpix0 = bgpixel;1212mlib_s32 bgpix1 = bgpixel >> 8;1213mlib_s32 bgpix2 = bgpixel >> 16;1214mlib_s32 bgpix3 = bgpixel >> 24;12151216if (width < 8) {1217for (j = 0; j < height; j++) {1218mlib_u8 *src = srcBase;1219mlib_u8 *dst = dstBase;12201221for (i = 0; i < width; i++) {1222x = pixLut[src[i]];1223if (x < 0) {1224dst[4*i ] = x >> 24;1225dst[4*i + 1] = x;1226dst[4*i + 2] = x >> 8;1227dst[4*i + 3] = x >> 16;1228} else {1229dst[4*i ] = bgpix0;1230dst[4*i + 1] = bgpix1;1231dst[4*i + 2] = bgpix2;1232dst[4*i + 3] = bgpix3;1233}1234}12351236PTR_ADD(dstBase, dstScan);1237PTR_ADD(srcBase, srcScan);1238}1239return;1240}12411242if (srcScan == width && dstScan == 4*width) {1243width *= height;1244height = 1;1245}12461247BMASK_FOR_ARGB12481249dzero = vis_fzero();1250d_bgpixel = vis_freg_pair(vis_ldfa_ASI_PL(&bgpixel),1251vis_ldfa_ASI_PL(&bgpixel));12521253for (j = 0; j < height; j++) {1254mlib_u8 *src = srcBase;1255mlib_u8 *dst = dstBase;1256mlib_u8 *dst_end;12571258dst_end = dst + 4*width;12591260if (!((mlib_s32)dst & 7)) {1261#pragma pipeloop(0)1262for (; dst <= (dst_end - 2*4); dst += 2*4) {1263dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],1264((mlib_f32*)pixLut)[src[1]]);1265mask = vis_fcmplt32(dd, dzero);1266ARGB2ABGR_DB(dd)1267*(mlib_d64*)dst = d_bgpixel;1268vis_pst_32(dd, dst, mask);1269src += 2;1270}1271}12721273while (dst < dst_end) {1274x = pixLut[*src++];1275if (x < 0) {1276dst[0] = x >> 24;1277dst[1] = x;1278dst[2] = x >> 8;1279dst[3] = x >> 16;1280} else {1281dst[0] = bgpix0;1282dst[1] = bgpix1;1283dst[2] = bgpix2;1284dst[3] = bgpix3;1285}1286dst += 4;1287}12881289PTR_ADD(dstBase, dstScan);1290PTR_ADD(srcBase, srcScan);1291}1292}12931294/***************************************************************/12951296void ADD_SUFF(ByteIndexedToFourByteAbgrScaleConvert)(SCALE_PARAMS)1297{1298jint *pixLut = pSrcInfo->lutBase;1299mlib_s32 dstScan = pDstInfo->scanStride;1300mlib_s32 srcScan = pSrcInfo->scanStride;1301mlib_d64 dd, d_old;1302mlib_s32 i, j, x;13031304/*1305if (!(((mlib_s32)dstBase | dstScan) & 3)) {1306ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);1307return;1308}1309*/13101311if (width < 8) {1312for (j = 0; j < height; j++) {1313mlib_u8 *src = srcBase;1314mlib_u8 *dst = dstBase;1315mlib_s32 tmpsxloc = sxloc;13161317PTR_ADD(src, (syloc >> shift) * srcScan);13181319for (i = 0; i < width; i++) {1320x = pixLut[src[tmpsxloc >> shift]];1321tmpsxloc += sxinc;1322dst[4*i ] = x >> 24;1323dst[4*i + 1] = x;1324dst[4*i + 2] = x >> 8;1325dst[4*i + 3] = x >> 16;1326}13271328PTR_ADD(dstBase, dstScan);1329syloc += syinc;1330}1331return;1332}13331334BMASK_FOR_ARGB13351336for (j = 0; j < height; j++) {1337mlib_u8 *src = srcBase;1338mlib_u8 *dst = dstBase;1339mlib_u8 *dst_end;1340mlib_s32 tmpsxloc = sxloc;13411342PTR_ADD(src, (syloc >> shift) * srcScan);13431344dst_end = dst + 4*width;13451346if (!((mlib_s32)dst & 7)) {1347#pragma pipeloop(0)1348for (; dst <= (dst_end - 2*4); dst += 2*4) {1349dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],1350src[(tmpsxloc + sxinc) >> shift]);1351tmpsxloc += 2*sxinc;1352ARGB2ABGR_DB(dd)1353*(mlib_d64*)dst = dd;1354}1355} else {1356mlib_d64 *dp;13571358dp = vis_alignaddr(dst, 0);1359dd = vis_faligndata(dp[0], dp[0]);1360vis_alignaddrl(dst, 0);13611362#pragma pipeloop(0)1363for (; dst <= (dst_end - 2*4); dst += 2*4) {1364d_old = dd;1365dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],1366src[(tmpsxloc + sxinc) >> shift]);1367tmpsxloc += 2*sxinc;1368ARGB2ABGR_DB(dd)1369*dp++ = vis_faligndata(d_old, dd);1370}13711372vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));1373}13741375while (dst < dst_end) {1376x = pixLut[src[tmpsxloc >> shift]];1377tmpsxloc += sxinc;1378dst[0] = x >> 24;1379dst[1] = x;1380dst[2] = x >> 8;1381dst[3] = x >> 16;1382dst += 4;1383}13841385PTR_ADD(dstBase, dstScan);1386syloc += syinc;1387}1388}13891390/***************************************************************/13911392void ADD_SUFF(ByteIndexedBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)1393{1394jint *pixLut = pSrcInfo->lutBase;1395mlib_s32 dstScan = pDstInfo->scanStride;1396mlib_s32 srcScan = pSrcInfo->scanStride;1397mlib_d64 dd, dzero;1398mlib_s32 i, j, x, mask;13991400/*1401if (!(((mlib_s32)dstBase | dstScan) & 3)) {1402ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);1403return;1404}1405*/14061407if (width < 8) {1408for (j = 0; j < height; j++) {1409mlib_u8 *src = srcBase;1410mlib_u8 *dst = dstBase;1411mlib_s32 tmpsxloc = sxloc;14121413PTR_ADD(src, (syloc >> shift) * srcScan);14141415for (i = 0; i < width; i++) {1416x = pixLut[src[tmpsxloc >> shift]];1417tmpsxloc += sxinc;1418if (x < 0) {1419dst[4*i ] = x >> 24;1420dst[4*i + 1] = x;1421dst[4*i + 2] = x >> 8;1422dst[4*i + 3] = x >> 16;1423}1424}14251426PTR_ADD(dstBase, dstScan);1427syloc += syinc;1428}1429return;1430}14311432BMASK_FOR_ARGB14331434dzero = vis_fzero();14351436for (j = 0; j < height; j++) {1437mlib_u8 *src = srcBase;1438mlib_u8 *dst = dstBase;1439mlib_u8 *dst_end;1440mlib_s32 tmpsxloc = sxloc;14411442PTR_ADD(src, (syloc >> shift) * srcScan);14431444dst_end = dst + 4*width;14451446if (!((mlib_s32)dst & 7)) {1447#pragma pipeloop(0)1448for (; dst <= (dst_end - 2*4); dst += 2*4) {1449dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],1450src[(tmpsxloc + sxinc) >> shift]);1451tmpsxloc += 2*sxinc;1452mask = vis_fcmplt32(dd, dzero);1453ARGB2ABGR_DB(dd)1454vis_pst_32(dd, dst, mask);1455}1456}14571458while (dst < dst_end) {1459x = pixLut[src[tmpsxloc >> shift]];1460tmpsxloc += sxinc;1461if (x < 0) {1462dst[0] = x >> 24;1463dst[1] = x;1464dst[2] = x >> 8;1465dst[3] = x >> 16;1466}1467dst += 4;1468}14691470PTR_ADD(dstBase, dstScan);1471syloc += syinc;1472}1473}14741475/***************************************************************/14761477void ADD_SUFF(IntArgbBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)1478{1479mlib_s32 dstScan = pDstInfo->scanStride;1480mlib_s32 srcScan = pSrcInfo->scanStride;1481mlib_d64 dd, amask;1482mlib_s32 i, j, x, mask;14831484if (width < 16) {1485for (j = 0; j < height; j++) {1486mlib_s32 *src = srcBase;1487mlib_u8 *dst = dstBase;1488mlib_s32 tmpsxloc = sxloc;14891490PTR_ADD(src, (syloc >> shift) * srcScan);14911492for (i = 0; i < width; i++) {1493x = src[tmpsxloc >> shift];1494tmpsxloc += sxinc;1495if (x >> 24) {1496dst[4*i ] = 0xFF;1497dst[4*i + 1] = x;1498dst[4*i + 2] = x >> 8;1499dst[4*i + 3] = x >> 16;1500}1501}15021503PTR_ADD(dstBase, dstScan);1504syloc += syinc;1505}1506return;1507}15081509BMASK_FOR_ARGB15101511amask = vis_to_double_dup(0xFF000000);15121513for (j = 0; j < height; j++) {1514mlib_s32 *src = srcBase;1515mlib_u8 *dst = dstBase;1516mlib_u8 *dst_end;1517mlib_s32 tmpsxloc = sxloc;15181519PTR_ADD(src, (syloc >> shift) * srcScan);15201521dst_end = dst + 4*width;15221523if (!((mlib_s32)dst & 7)) {1524#pragma pipeloop(0)1525for (; dst <= (dst_end - 2*4); dst += 2*4) {1526mlib_s32 *pp0 = src + (tmpsxloc >> shift);1527mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);1528dd = vis_freg_pair(*(mlib_f32*)pp0, *(mlib_f32*)pp1);1529tmpsxloc += 2*sxinc;1530ARGB2ABGR_DB(dd)1531dd = vis_for(dd, amask);1532mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |1533(((-*(mlib_u8*)pp1) >> 31) & 1);1534vis_pst_32(dd, dst, mask);1535}1536}15371538while (dst < dst_end) {1539x = src[tmpsxloc >> shift];1540tmpsxloc += sxinc;1541if (x >> 24) {1542dst[0] = 0xFF;1543dst[1] = x;1544dst[2] = x >> 8;1545dst[3] = x >> 16;1546}1547dst += 4;1548}15491550PTR_ADD(dstBase, dstScan);1551syloc += syinc;1552}1553}15541555/***************************************************************/15561557#ifdef MLIB_ADD_SUFF1558#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver_F = \1559IntArgbBmToFourByteAbgrScaleXparOver_F1560#else1561#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver = \1562IntArgbBmToFourByteAbgrScaleXparOver1563#endif15641565/***************************************************************/15661567void ADD_SUFF(FourByteAbgrToIntArgbScaleConvert)(SCALE_PARAMS)1568{1569mlib_s32 dstScan = pDstInfo->scanStride;1570mlib_s32 srcScan = pSrcInfo->scanStride;1571mlib_s32 i, j;15721573if (width < 16) {1574for (j = 0; j < height; j++) {1575mlib_u8 *src = srcBase;1576mlib_s32 *dst = dstBase;1577mlib_s32 tmpsxloc = sxloc;15781579PTR_ADD(src, (syloc >> shift) * srcScan);15801581for (i = 0; i < width; i++) {1582mlib_u8 *pp = src + 4*(tmpsxloc >> shift);1583*dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];1584tmpsxloc += sxinc;1585}15861587PTR_ADD(dstBase, dstScan);1588syloc += syinc;1589}1590return;1591}15921593BMASK_FOR_ARGB15941595for (j = 0; j < height; j++) {1596mlib_u8 *src = srcBase;1597mlib_s32 *dst = dstBase;1598mlib_s32 *dst_end = dst + width;1599mlib_s32 tmpsxloc = sxloc;1600mlib_s32 off;1601mlib_d64 dd, dd0, dd1;1602mlib_f32 *pp0, *pp1;16031604PTR_ADD(src, (syloc >> shift) * srcScan);16051606if ((mlib_s32)dst & 7) {1607mlib_u8 *pp = src + 4*(tmpsxloc >> shift);1608*dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];1609tmpsxloc += sxinc;1610}16111612off = (mlib_s32)src & 3;1613if (!off) {1614#pragma pipeloop(0)1615for (; dst <= (dst_end - 2); dst += 2) {1616pp0 = (mlib_f32*)src + (tmpsxloc >> shift);1617pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);1618tmpsxloc += 2*sxinc;1619dd = vis_freg_pair(pp0[0], pp1[0]);1620ARGB2ABGR_DB(dd)1621*(mlib_d64*)dst = dd;1622}1623} else {1624vis_alignaddr(NULL, off);1625#pragma pipeloop(0)1626for (; dst <= (dst_end - 2); dst += 2) {1627pp0 = (mlib_f32*)(src - off) + (tmpsxloc >> shift);1628pp1 = (mlib_f32*)(src - off) + ((tmpsxloc + sxinc) >> shift);1629tmpsxloc += 2*sxinc;1630dd0 = vis_freg_pair(pp0[0], pp0[1]);1631dd1 = vis_freg_pair(pp1[0], pp1[1]);1632dd0 = vis_faligndata(dd0, dd0);1633dd1 = vis_faligndata(dd1, dd1);1634ARGB2ABGR_FL2(dd, vis_read_hi(dd0), vis_read_hi(dd1))1635*(mlib_d64*)dst = dd;1636}1637}16381639if (dst < dst_end) {1640mlib_u8 *pp = src + 4*(tmpsxloc >> shift);1641*dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];1642tmpsxloc += sxinc;1643}16441645PTR_ADD(dstBase, dstScan);1646syloc += syinc;1647}1648}16491650/***************************************************************/16511652void ADD_SUFF(IntArgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)1653{1654mlib_s32 dstScan = pDstInfo->scanStride;1655mlib_s32 srcScan = pSrcInfo->scanStride;1656mlib_s32 i, j;1657mlib_s32 x;16581659if (width < 16) {1660for (j = 0; j < height; j++) {1661mlib_s32 *src = srcBase;1662mlib_u8 *dst = dstBase;1663mlib_s32 tmpsxloc = sxloc;16641665PTR_ADD(src, (syloc >> shift) * srcScan);16661667for (i = 0; i < width; i++) {1668x = src[tmpsxloc >> shift];1669tmpsxloc += sxinc;1670dst[4*i ] = x >> 24;1671dst[4*i + 1] = x;1672dst[4*i + 2] = x >> 8;1673dst[4*i + 3] = x >> 16;1674}16751676PTR_ADD(dstBase, dstScan);1677syloc += syinc;1678}1679return;1680}16811682BMASK_FOR_ARGB16831684for (j = 0; j < height; j++) {1685mlib_s32 *src = srcBase;1686mlib_u8 *dst = dstBase;1687mlib_u8 *dst_end = dst + 4*width;1688mlib_s32 tmpsxloc = sxloc;1689mlib_d64 dd, d_old;1690mlib_f32 *pp0, *pp1;16911692PTR_ADD(src, (syloc >> shift) * srcScan);16931694if (!((mlib_s32)dst & 3)) {1695if ((mlib_s32)dst & 7) {1696x = src[tmpsxloc >> shift];1697tmpsxloc += sxinc;1698dst[0] = x >> 24;1699dst[1] = x;1700dst[2] = x >> 8;1701dst[3] = x >> 16;1702dst += 4;1703}1704#pragma pipeloop(0)1705for (; dst <= (dst_end - 2*4); dst += 2*4) {1706pp0 = (mlib_f32*)src + (tmpsxloc >> shift);1707pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);1708tmpsxloc += 2*sxinc;1709dd = vis_freg_pair(pp0[0], pp1[0]);1710ARGB2ABGR_DB(dd)1711*(mlib_d64*)dst = dd;1712}1713} else {1714mlib_d64 *dp;17151716dp = vis_alignaddr(dst, 0);1717dd = vis_faligndata(dp[0], dp[0]);1718vis_alignaddrl(dst, 0);17191720#pragma pipeloop(0)1721for (; dst <= (dst_end - 2*4); dst += 2*4) {1722d_old = dd;1723pp0 = (mlib_f32*)src + (tmpsxloc >> shift);1724pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);1725tmpsxloc += 2*sxinc;1726dd = vis_freg_pair(pp0[0], pp1[0]);1727ARGB2ABGR_DB(dd)1728*dp++ = vis_faligndata(d_old, dd);1729}17301731vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));1732}17331734if (dst < dst_end) {1735x = src[tmpsxloc >> shift];1736tmpsxloc += sxinc;1737dst[0] = x >> 24;1738dst[1] = x;1739dst[2] = x >> 8;1740dst[3] = x >> 16;1741dst += 4;1742}17431744PTR_ADD(dstBase, dstScan);1745syloc += syinc;1746}1747}17481749/***************************************************************/17501751void ADD_SUFF(IntRgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)1752{1753mlib_s32 dstScan = pDstInfo->scanStride;1754mlib_s32 srcScan = pSrcInfo->scanStride;1755mlib_s32 i, j;1756mlib_s32 x;1757mlib_d64 amask = vis_to_double_dup(0xFF000000);17581759if (width < 16) {1760for (j = 0; j < height; j++) {1761mlib_s32 *src = srcBase;1762mlib_u8 *dst = dstBase;1763mlib_s32 tmpsxloc = sxloc;17641765PTR_ADD(src, (syloc >> shift) * srcScan);17661767for (i = 0; i < width; i++) {1768x = src[tmpsxloc >> shift];1769tmpsxloc += sxinc;1770dst[4*i ] = 0xFF;1771dst[4*i + 1] = x;1772dst[4*i + 2] = x >> 8;1773dst[4*i + 3] = x >> 16;1774}17751776PTR_ADD(dstBase, dstScan);1777syloc += syinc;1778}1779return;1780}17811782BMASK_FOR_ARGB17831784for (j = 0; j < height; j++) {1785mlib_s32 *src = srcBase;1786mlib_u8 *dst = dstBase;1787mlib_u8 *dst_end = dst + 4*width;1788mlib_s32 tmpsxloc = sxloc;1789mlib_d64 dd, d_old;1790mlib_f32 *pp0, *pp1;17911792PTR_ADD(src, (syloc >> shift) * srcScan);17931794if (!((mlib_s32)dst & 3)) {1795if ((mlib_s32)dst & 7) {1796x = src[tmpsxloc >> shift];1797tmpsxloc += sxinc;1798dst[0] = 0xFF;1799dst[1] = x;1800dst[2] = x >> 8;1801dst[3] = x >> 16;1802dst += 4;1803}1804#pragma pipeloop(0)1805for (; dst <= (dst_end - 2*4); dst += 2*4) {1806pp0 = (mlib_f32*)src + (tmpsxloc >> shift);1807pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);1808tmpsxloc += 2*sxinc;1809dd = vis_freg_pair(pp0[0], pp1[0]);1810RGB2ABGR_DB(dd)1811*(mlib_d64*)dst = dd;1812}1813} else {1814mlib_d64 *dp;18151816dp = vis_alignaddr(dst, 0);1817dd = vis_faligndata(dp[0], dp[0]);1818vis_alignaddrl(dst, 0);18191820#pragma pipeloop(0)1821for (; dst <= (dst_end - 2*4); dst += 2*4) {1822d_old = dd;1823pp0 = (mlib_f32*)src + (tmpsxloc >> shift);1824pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);1825tmpsxloc += 2*sxinc;1826dd = vis_freg_pair(pp0[0], pp1[0]);1827RGB2ABGR_DB(dd)1828*dp++ = vis_faligndata(d_old, dd);1829}18301831vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));1832}18331834if (dst < dst_end) {1835x = src[tmpsxloc >> shift];1836tmpsxloc += sxinc;1837dst[0] = 0xFF;1838dst[1] = x;1839dst[2] = x >> 8;1840dst[3] = x >> 16;1841dst += 4;1842}18431844PTR_ADD(dstBase, dstScan);1845syloc += syinc;1846}1847}18481849/***************************************************************/18501851void ADD_SUFF(FourByteAbgrDrawGlyphListAA)(SurfaceDataRasInfo * pRasInfo,1852ImageRef *glyphs,1853jint totalGlyphs,1854jint fgpixel, jint argbcolor,1855jint clipLeft, jint clipTop,1856jint clipRight, jint clipBottom,1857NativePrimitive * pPrim,1858CompositeInfo * pCompInfo)1859{1860mlib_d64 buff[BUFF_SIZE/2];1861void *pbuff = buff;1862mlib_s32 glyphCounter;1863mlib_s32 scan = pRasInfo->scanStride;1864mlib_u8 *dstBase;1865mlib_s32 i, j;1866mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;1867mlib_d64 done, done16, d_half;1868mlib_s32 pix, mask;1869mlib_f32 fgpixel_f, srcG_f;1870mlib_s32 max_width = BUFF_SIZE;18711872done = vis_to_double_dup(0x7fff7fff);1873done16 = vis_to_double_dup(0x7fff);1874d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));18751876fgpixel_f = vis_ldfa_ASI_PL(&fgpixel);1877fgpixel_d = vis_freg_pair(fgpixel_f, fgpixel_f);1878srcG_f = vis_to_float(argbcolor);1879ARGB2ABGR_FL(srcG_f)18801881vis_write_gsr(0 << 3);18821883for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {1884const jubyte *pixels;1885unsigned int rowBytes;1886int left, top;1887int width, height;1888int right, bottom;18891890pixels = (const jubyte *) glyphs[glyphCounter].pixels;18911892if (!pixels) continue;18931894left = glyphs[glyphCounter].x;1895top = glyphs[glyphCounter].y;1896width = glyphs[glyphCounter].width;1897height = glyphs[glyphCounter].height;1898rowBytes = width;1899right = left + width;1900bottom = top + height;1901if (left < clipLeft) {1902pixels += clipLeft - left;1903left = clipLeft;1904}1905if (top < clipTop) {1906pixels += (clipTop - top) * rowBytes;1907top = clipTop;1908}1909if (right > clipRight) {1910right = clipRight;1911}1912if (bottom > clipBottom) {1913bottom = clipBottom;1914}1915if (right <= left || bottom <= top) {1916continue;1917}1918width = right - left;1919height = bottom - top;19201921dstBase = pRasInfo->rasBase;1922PTR_ADD(dstBase, top*scan + 4*left);19231924if (((mlib_s32)dstBase | scan) & 3) {1925if (width > max_width) {1926if (pbuff != buff) {1927mlib_free(pbuff);1928}1929pbuff = mlib_malloc(width*sizeof(mlib_s32));1930if (pbuff == NULL) return;1931max_width = width;1932}1933}19341935for (j = 0; j < height; j++) {1936mlib_u8 *src = (void*)pixels;1937mlib_s32 *dst, *dst_end;1938mlib_u8 *dst_start;19391940if ((mlib_s32)dstBase & 3) {1941COPY_NA(dstBase, pbuff, width*sizeof(mlib_s32));1942dst = pbuff;1943} else {1944dst = (void*)dstBase;1945}1946dst_start = (void*)dst;1947dst_end = dst + width;19481949/* Need to reset the GSR from the values set by the1950* convert call near the end of this loop.1951*/1952vis_write_gsr(7 << 0);19531954if ((mlib_s32)dst & 7) {1955pix = *src++;1956dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);1957dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);1958*(mlib_f32*)dst = vis_fpack16(dd);1959if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);1960dst++;1961}19621963#pragma pipeloop(0)1964for (; dst <= (dst_end - 2); dst += 2) {1965dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],1966((mlib_f32 *)vis_mul8s_tbl)[src[1]]);1967mask = vis_fcmplt32(dmix0, done16);1968dmix1 = vis_fpsub16(done, dmix0);1969src += 2;19701971dd = *(mlib_d64*)dst;1972d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));1973d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));1974e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));1975e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));1976d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);1977d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);1978dd = vis_fpack16_pair(d0, d1);19791980*(mlib_d64*)dst = fgpixel_d;1981vis_pst_32(dd, dst, mask);1982}19831984while (dst < dst_end) {1985pix = *src++;1986dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);1987dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);1988*(mlib_f32*)dst = vis_fpack16(dd);1989if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);1990dst++;1991}19921993ADD_SUFF(IntArgbPreToIntArgbConvert)(dst_start, dst_start,1994width, 1,1995pRasInfo, pRasInfo,1996pPrim, pCompInfo);19971998if ((mlib_s32)dstBase & 3) {1999COPY_NA(dst_start, dstBase, width*sizeof(mlib_s32));2000}20012002PTR_ADD(dstBase, scan);2003pixels += rowBytes;2004}2005}20062007if (pbuff != buff) {2008mlib_free(pbuff);2009}2010}20112012/***************************************************************/20132014#endif /* JAVA2D_NO_MLIB */201520162017