Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/solaris/native/sun/java2d/loops/vis_Interp.c
32288 views
/*1* Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425#include <vis_proto.h>26#include "java2d_Mlib.h"2728/*#define USE_TWO_BC_TABLES*/ /* a little more precise, but slow on Ultra-III */2930/***************************************************************/3132#define MUL_16x16(src1, src2) \33vis_fpadd16(vis_fmul8sux16((src1), (src2)), \34vis_fmul8ulx16((src1), (src2)))3536#define BILINEAR \37xf = vis_fand(xf, mask7fff); \38yf = vis_fand(yf, mask7fff); \39xr = vis_fpsub32(mask7fff, xf); \40yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf)); \41yf1 = vis_fmul8x16au(mask80, vis_read_lo(yf)); \42\43a0 = vis_fmul8x16au(vis_read_hi(a01), vis_read_hi(xr)); \44a1 = vis_fmul8x16au(vis_read_lo(a01), vis_read_hi(xf)); \45a2 = vis_fmul8x16au(vis_read_hi(a23), vis_read_hi(xr)); \46a3 = vis_fmul8x16au(vis_read_lo(a23), vis_read_hi(xf)); \47a0 = vis_fpadd16(a0, a1); \48a2 = vis_fpadd16(a2, a3); \49a2 = vis_fpsub16(a2, a0); \50a2 = MUL_16x16(a2, yf0); \51a0 = vis_fmul8x16(mask40, a0); \52a0 = vis_fpadd16(a0, a2); \53a0 = vis_fpadd16(a0, d_rnd); \54\55b0 = vis_fmul8x16au(vis_read_hi(b01), vis_read_lo(xr)); \56b1 = vis_fmul8x16au(vis_read_lo(b01), vis_read_lo(xf)); \57b2 = vis_fmul8x16au(vis_read_hi(b23), vis_read_lo(xr)); \58b3 = vis_fmul8x16au(vis_read_lo(b23), vis_read_lo(xf)); \59b0 = vis_fpadd16(b0, b1); \60b2 = vis_fpadd16(b2, b3); \61b2 = vis_fpsub16(b2, b0); \62b2 = MUL_16x16(b2, yf1); \63b0 = vis_fmul8x16(mask40, b0); \64b0 = vis_fpadd16(b0, b2); \65b0 = vis_fpadd16(b0, d_rnd); \66\67xf = vis_fpadd32(xf, dx); \68yf = vis_fpadd32(yf, dy)6970void71vis_BilinearBlend(jint *pRGB, jint numpix,72jint xfract, jint dxfract,73jint yfract, jint dyfract)74{75mlib_d64 *p_src = (void*)pRGB;76mlib_f32 *p_dst = (void*)pRGB;77mlib_d64 a01, a23, a0, a1, a2, a3;78mlib_d64 b01, b23, b0, b1, b2, b3;79mlib_d64 xf, xr, dx, yf, yf0, yf1, dy;80mlib_d64 mask7fff, d_rnd;81mlib_f32 mask80, mask40;82mlib_s32 i;8384vis_write_gsr(2 << 3);8586xf = vis_to_double(xfract >> 1, (xfract + dxfract) >> 1);87yf = vis_to_double(yfract >> 1, (yfract + dyfract) >> 1);88dx = vis_to_double_dup(dxfract);89dy = vis_to_double_dup(dyfract);9091mask7fff = vis_to_double_dup(0x7fffffff);92d_rnd = vis_to_double_dup(0x00100010);93mask80 = vis_to_float(0x80808080);94mask40 = vis_to_float(0x40404040);9596#pragma pipeloop(0)97for (i = 0; i < numpix/2; i++) {98a01 = p_src[0];99a23 = p_src[1];100b01 = p_src[2];101b23 = p_src[3];102p_src += 4;103104BILINEAR;105106((mlib_d64*)p_dst)[0] = vis_fpack16_pair(a0, b0);107p_dst += 2;108}109110if (numpix & 1) {111a01 = p_src[0];112a23 = p_src[1];113114BILINEAR;115116p_dst[0] = vis_fpack16(a0);117}118}119120/***************************************************************/121122static jboolean vis_bicubic_table_inited = 0;123static mlib_d64 vis_bicubic_coeff[256 + 1];124#ifdef USE_TWO_BC_TABLES125static mlib_d64 vis_bicubic_coeff2[512 + 1];126#endif127128/*129* REMIND: The following formulas are designed to give smooth130* results when 'A' is -0.5 or -1.0.131*/132133static void134init_vis_bicubic_table(jdouble A)135{136mlib_s16 *p_tbl = (void*)vis_bicubic_coeff;137#ifdef USE_TWO_BC_TABLES138mlib_s16 *p_tbl2 = (void*)vis_bicubic_coeff2;139#endif140mlib_d64 x, y;141int i;142143for (i = 0; i <= 256; i++) {144x = i*(1.0/256.0);145146/* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */147y = ((A+2)*x - (A+3))*x*x + 1;148y *= 16384;149p_tbl[4*i + 1] = p_tbl[4*(256 - i) + 2] = (mlib_s16)y;150#ifdef USE_TWO_BC_TABLES151y *= 2;152if (y >= 32767) y = 32767;153p_tbl2[4*i] = p_tbl2[4*i + 1] =154p_tbl2[4*i + 2] = p_tbl2[4*i + 3] = (mlib_s16)y;155#endif156157/* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */158x += 1.0;159y = ((A*x - 5*A)*x + 8*A)*x - 4*A;160y *= 16384;161p_tbl[4*i] = p_tbl[4*(256 - i) + 3] = (mlib_s16)y;162#ifdef USE_TWO_BC_TABLES163y *= 2;164if (y >= 32767) y = 32767;165p_tbl2[4*i + 1024] = p_tbl2[4*i + 1025] =166p_tbl2[4*i + 1026] = p_tbl2[4*i + 1027] = (mlib_s16)y;167#endif168}169vis_bicubic_table_inited = 1;170}171172/***************************************************************/173174#define MUL_BC_COEFF(x0, x1, coeff) \175vis_fpadd16(vis_fmul8x16au(x0, coeff), vis_fmul8x16al(x1, coeff))176177#define SAT(val, max) \178do { \179val -= max; /* only overflows are now positive */ \180val &= (val >> 31); /* positives become 0 */ \181val += max; /* range is now [0 -> max] */ \182} while (0)183184void185vis_BicubicBlend(jint *pRGB, jint numpix,186jint xfract, jint dxfract,187jint yfract, jint dyfract)188{189mlib_d64 *p_src = (void*)pRGB;190union {191jint theInt;192mlib_f32 theF32;193} p_dst;194mlib_d64 a0, a1, a2, a3, a4, a5, a6, a7;195mlib_d64 xf, yf, yf0, yf1, yf2, yf3;196mlib_d64 d_rnd;197mlib_f32 mask80;198mlib_s32 i;199200if (!vis_bicubic_table_inited) {201init_vis_bicubic_table(-0.5);202}203204#ifdef USE_TWO_BC_TABLES205vis_write_gsr(2 << 3);206d_rnd = vis_to_double_dup(0x000f000f);207#else208vis_write_gsr(4 << 3);209d_rnd = vis_to_double_dup(0x00030003);210#endif211212mask80 = vis_to_float(0x80808080);213214#pragma pipeloop(0)215for (i = 0; i < numpix; i++) {216jint xfactor, yfactor;217218xfactor = URShift(xfract, 32-8);219xfract += dxfract;220xf = vis_bicubic_coeff[xfactor];221222a0 = p_src[0];223a1 = p_src[1];224a2 = p_src[2];225a3 = p_src[3];226a4 = p_src[4];227a5 = p_src[5];228a6 = p_src[6];229a7 = p_src[7];230p_src += 8;231232a0 = MUL_BC_COEFF(vis_read_hi(a0), vis_read_lo(a0), vis_read_hi(xf));233a1 = MUL_BC_COEFF(vis_read_hi(a1), vis_read_lo(a1), vis_read_lo(xf));234a2 = MUL_BC_COEFF(vis_read_hi(a2), vis_read_lo(a2), vis_read_hi(xf));235a3 = MUL_BC_COEFF(vis_read_hi(a3), vis_read_lo(a3), vis_read_lo(xf));236a4 = MUL_BC_COEFF(vis_read_hi(a4), vis_read_lo(a4), vis_read_hi(xf));237a5 = MUL_BC_COEFF(vis_read_hi(a5), vis_read_lo(a5), vis_read_lo(xf));238a6 = MUL_BC_COEFF(vis_read_hi(a6), vis_read_lo(a6), vis_read_hi(xf));239a7 = MUL_BC_COEFF(vis_read_hi(a7), vis_read_lo(a7), vis_read_lo(xf));240241a0 = vis_fpadd16(a0, a1);242a1 = vis_fpadd16(a2, a3);243a2 = vis_fpadd16(a4, a5);244a3 = vis_fpadd16(a6, a7);245246yfactor = URShift(yfract, 32-8);247yfract += dyfract;248#ifdef USE_TWO_BC_TABLES249yf0 = vis_bicubic_coeff2[256 + yfactor];250yf1 = vis_bicubic_coeff2[yfactor];251yf2 = vis_bicubic_coeff2[256 - yfactor];252yf3 = vis_bicubic_coeff2[512 - yfactor];253#else254yf = vis_bicubic_coeff[yfactor];255yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf));256yf1 = vis_fmul8x16al(mask80, vis_read_hi(yf));257yf2 = vis_fmul8x16au(mask80, vis_read_lo(yf));258yf3 = vis_fmul8x16al(mask80, vis_read_lo(yf));259#endif260261a0 = MUL_16x16(a0, yf0);262a1 = MUL_16x16(a1, yf1);263a2 = MUL_16x16(a2, yf2);264a3 = MUL_16x16(a3, yf3);265a0 = vis_fpadd16(a0, d_rnd);266267a0 = vis_fpadd16(vis_fpadd16(a0, a1), vis_fpadd16(a2, a3));268269p_dst.theF32 = vis_fpack16(a0);270{271int a, r, g, b;272b = p_dst.theInt;273a = (b >> 24) & 0xff;274r = (b >> 16) & 0xff;275g = (b >> 8) & 0xff;276b = (b ) & 0xff;277SAT(r, a);278SAT(g, a);279SAT(b, a);280*pRGB++ = ((a << 24) | (r << 16) | (g << 8) | (b));281}282}283}284285/***************************************************************/286287288