Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/windows/native/sun/java2d/d3d/D3DShaderGen.c
32288 views
/*1* Copyright (c) 2007, 2008, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/**26* This file contains a standalone program that is used to generate the27* D3DShaders.h file. The program invokes the fxc (D3D Shader Compiler)28* utility, which is part of the DirectX 9/10 SDK. Since most JDK29* developers (other than some Java 2D engineers) do not have the full DXSDK30* installed, and since we do not want to make the JDK build process31* dependent on the full DXSDK installation, we have chosen not to make32* this shader compilation step part of the build process. Instead, it is33* only necessary to compile and run this program when changes need to be34* made to the shader code contained within. Typically, this only happens35* on an as-needed basis by someone familiar with the D3D pipeline. Running36* this program is fairly straightforward:37*38* % rm D3DShaders.h39* % cl D3DShaderGen.c40* % D3DShaderGen.exe41*42* (And don't forget to putback the updated D3DShaders.h file!)43*/4445#include <stdio.h>46#include <process.h>47#include <Windows.h>4849static FILE *fpHeader = NULL;50static char *strHeaderFile = "D3DShaders.h";5152/** Evaluates to true if the given bit is set on the local flags variable. */53#define IS_SET(flagbit) \54(((flags) & (flagbit)) != 0)5556// REMIND57//#define J2dTraceLn(a, b) fprintf(stderr, "%s\n", b);58//#define J2dTraceLn1(a, b, c) fprintf(stderr, b, c);59#define J2dTraceLn(a, b)60#define J2dTraceLn1(a, b, c)6162/************************* General shader support ***************************/6364static void65D3DShaderGen_WriteShader(char *source, char *target, char *name, int flags)66{67FILE *fpTmp;68char varname[50];69char *args[8];70int val;7172// write source to tmp.hlsl73fpTmp = fopen("tmp.hlsl", "w");74fprintf(fpTmp, "%s\n", source);75fclose(fpTmp);7677{78PROCESS_INFORMATION pi;79STARTUPINFO si;80char pargs[300];81sprintf(pargs,82"c:\\progra~1\\mi5889~1\\utilit~1\\bin\\x86\\fxc.exe "83"/T %s /Vn %s%d /Fh tmp.h tmp.hlsl",84// uncomment the following line to generate debug85// info in the shader header file (may be useful86// for testing/debuggging purposes, but it nearly87// doubles the size of the header file and compiled88// shader programs - off for production builds)89//"/Zi /T %s /Vn %s%d /Fh tmp.h tmp.hlsl",90target, name, flags);91fprintf(stderr, "%s\n", pargs);92memset(&si, 0, sizeof(si));93si.cb = sizeof(si);94si.dwFlags = STARTF_USESTDHANDLES;95//si.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE);96//fprintf(stderr, "%s\n", pargs);97val = CreateProcess(0, pargs, 0, 0, TRUE,98CREATE_NO_WINDOW, NULL, NULL, &si, &pi);99100{101DWORD code;102do {103GetExitCodeProcess(pi.hProcess, &code);104//fprintf(stderr, "waiting...");105Sleep(100);106} while (code == STILL_ACTIVE);107108if (code != 0) {109fprintf(stderr, "fxc failed for %s%d\n", name, flags);110}111}112113CloseHandle(pi.hThread);114CloseHandle(pi.hProcess);115}116117// append tmp.h to D3DShaders.h118{119int ch;120fpTmp = fopen("tmp.h", "r");121while ((ch = fgetc(fpTmp)) != EOF) {122fputc(ch, fpHeader);123}124fclose(fpTmp);125}126}127128static void129D3DShaderGen_WritePixelShader(char *source, char *name, int flags)130{131D3DShaderGen_WriteShader(source, "ps_2_0", name, flags);132}133134#define MULTI_GRAD_CYCLE_METHOD (3 << 0)135/** Extracts the CycleMethod enum value from the given flags variable. */136#define EXTRACT_CYCLE_METHOD(flags) \137((flags) & MULTI_GRAD_CYCLE_METHOD)138139static void140D3DShaderGen_WriteShaderArray(char *name, int num)141{142char array[5000];143char elem[30];144int i;145146sprintf(array, "const DWORD *%sShaders[] =\n{\n", name);147for (i = 0; i < num; i++) {148if (num == 32 && EXTRACT_CYCLE_METHOD(i) == 3) {149// REMIND: what a hack!150sprintf(elem, " NULL,\n");151} else {152sprintf(elem, " %s%d,\n", name, i);153}154strcat(array, elem);155}156strcat(array, "};\n");157158// append to D3DShaders.h159fprintf(fpHeader, "%s\n", array);160}161162/**************************** ConvolveOp support ****************************/163164static const char *convolveShaderSource =165// image to be convolved166"sampler2D baseImage : register(s0);"167// image edge limits:168// imgEdge.xy = imgMin.xy (anything < will be treated as edge case)169// imgEdge.zw = imgMax.xy (anything > will be treated as edge case)170"float4 imgEdge : register(c0);"171// value for each location in the convolution kernel:172// kernelVals[i].x = offsetX[i]173// kernelVals[i].y = offsetY[i]174// kernelVals[i].z = kernel[i]175"float3 kernelVals[%d] : register(c1);"176""177"void main(in float2 tc : TEXCOORD0,"178" inout float4 color : COLOR0)"179"{"180" float4 sum = imgEdge - tc.xyxy;"181""182" if (sum.x > 0 || sum.y > 0 || sum.z < 0 || sum.w < 0) {"183// (placeholder for edge condition code)184" color = %s;"185" } else {"186" int i;"187" sum = float4(0, 0, 0, 0);"188" for (i = 0; i < %d; i++) {"189" sum +="190" kernelVals[i].z *"191" tex2D(baseImage, tc + kernelVals[i].xy);"192" }"193// modulate with current color in order to apply extra alpha194" color *= sum;"195" }"196""197"}";198199/**200* Flags that can be bitwise-or'ed together to control how the shader201* source code is generated.202*/203#define CONVOLVE_EDGE_ZERO_FILL (1 << 0)204#define CONVOLVE_5X5 (1 << 1)205#define MAX_CONVOLVE (1 << 2)206207static void208D3DShaderGen_GenerateConvolveShader(int flags)209{210int kernelMax = IS_SET(CONVOLVE_5X5) ? 25 : 9;211char *edge;212char finalSource[2000];213214J2dTraceLn1(J2D_TRACE_INFO,215"D3DShaderGen_GenerateConvolveShader: flags=%d",216flags);217218if (IS_SET(CONVOLVE_EDGE_ZERO_FILL)) {219// EDGE_ZERO_FILL: fill in zero at the edges220edge = "float4(0, 0, 0, 0)";221} else {222// EDGE_NO_OP: use the source pixel color at the edges223edge = "tex2D(baseImage, tc)";224}225226// compose the final source code string from the various pieces227sprintf(finalSource, convolveShaderSource,228kernelMax, edge, kernelMax);229230D3DShaderGen_WritePixelShader(finalSource, "convolve", flags);231}232233/**************************** RescaleOp support *****************************/234235static const char *rescaleShaderSource =236// image to be rescaled237"sampler2D baseImage : register(s0);"238// vector containing scale factors239"float4 scaleFactors : register(c0);"240// vector containing offsets241"float4 offsets : register(c1);"242""243"void main(in float2 tc : TEXCOORD0,"244" inout float4 color : COLOR0)"245"{"246" float4 srcColor = tex2D(baseImage, tc);"247""248// (placeholder for un-premult code)249" %s"250""251// rescale source value252" float4 result = (srcColor * scaleFactors) + offsets;"253""254// (placeholder for re-premult code)255" %s"256""257// modulate with current color in order to apply extra alpha258" color *= result;"259"}";260261/**262* Flags that can be bitwise-or'ed together to control how the shader263* source code is generated.264*/265#define RESCALE_NON_PREMULT (1 << 0)266#define MAX_RESCALE (1 << 1)267268static void269D3DShaderGen_GenerateRescaleShader(int flags)270{271char *preRescale = "";272char *postRescale = "";273char finalSource[2000];274275J2dTraceLn1(J2D_TRACE_INFO,276"D3DShaderGen_GenerateRescaleShader: flags=%d",277flags);278279if (IS_SET(RESCALE_NON_PREMULT)) {280preRescale = "srcColor.rgb /= srcColor.a;";281postRescale = "result.rgb *= result.a;";282}283284// compose the final source code string from the various pieces285sprintf(finalSource, rescaleShaderSource,286preRescale, postRescale);287288D3DShaderGen_WritePixelShader(finalSource, "rescale", flags);289}290291/**************************** LookupOp support ******************************/292293static const char *lookupShaderSource =294// source image (bound to texture unit 0)295"sampler2D baseImage : register(s0);"296// lookup table (bound to texture unit 1)297"sampler2D lookupTable : register(s1);"298// offset subtracted from source index prior to lookup step299"float4 offset : register(c0);"300""301"void main(in float2 tc : TEXCOORD0,"302" inout float4 color : COLOR0)"303"{"304" float4 srcColor = tex2D(baseImage, tc);"305// (placeholder for un-premult code)306" %s"307// subtract offset from original index308" float4 srcIndex = srcColor - offset;"309// use source value as input to lookup table (note that310// "v" texcoords are hardcoded to hit texel centers of311// each row/band in texture)312" float4 result;"313" result.r = tex2D(lookupTable, float2(srcIndex.r, 0.125)).r;"314" result.g = tex2D(lookupTable, float2(srcIndex.g, 0.375)).r;"315" result.b = tex2D(lookupTable, float2(srcIndex.b, 0.625)).r;"316// (placeholder for alpha store code)317" %s"318// (placeholder for re-premult code)319" %s"320// modulate with current color in order to apply extra alpha321" color *= result;"322"}";323324/**325* Flags that can be bitwise-or'ed together to control how the shader326* source code is generated.327*/328#define LOOKUP_USE_SRC_ALPHA (1 << 0)329#define LOOKUP_NON_PREMULT (1 << 1)330#define MAX_LOOKUP (1 << 2)331332static void333D3DShaderGen_GenerateLookupShader(int flags)334{335char *alpha;336char *preLookup = "";337char *postLookup = "";338char finalSource[2000];339340J2dTraceLn1(J2D_TRACE_INFO,341"D3DShaderGen_GenerateLookupShader: flags=%d",342flags);343344if (IS_SET(LOOKUP_USE_SRC_ALPHA)) {345// when numComps is 1 or 3, the alpha is not looked up in the table;346// just keep the alpha from the source fragment347alpha = "result.a = srcColor.a;";348} else {349// when numComps is 4, the alpha is looked up in the table, just350// like the other color components from the source fragment351alpha = "result.a = tex2D(lookupTable, float2(srcIndex.a, 0.875)).r;";352}353if (IS_SET(LOOKUP_NON_PREMULT)) {354preLookup = "srcColor.rgb /= srcColor.a;";355postLookup = "result.rgb *= result.a;";356}357358// compose the final source code string from the various pieces359sprintf(finalSource, lookupShaderSource,360preLookup, alpha, postLookup);361362D3DShaderGen_WritePixelShader(finalSource, "lookup", flags);363}364365/************************* GradientPaint support ****************************/366367/*368* To simplify the code and to make it easier to upload a number of369* uniform values at once, we pack a bunch of scalar (float) values370* into a single float3 below. Here's how the values are related:371*372* params.x = p0373* params.y = p1374* params.z = p3375*/376static const char *basicGradientShaderSource =377"float3 params : register (c0);"378"float4 color1 : register (c1);"379"float4 color2 : register (c2);"380// (placeholder for mask variable)381"%s"382""383// (placeholder for mask texcoord input)384"void main(%s"385" in float4 winCoord : TEXCOORD%d,"386" inout float4 color : COLOR0)"387"{"388" float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"389" float dist = dot(params.xyz, fragCoord);"390""391// the setup code for p0/p1/p3 translates/scales to hit texel392// centers (at 0.25 and 0.75) because it is needed for the393// original/fast texture-based implementation, but it is not394// desirable for this shader-based implementation, so we395// re-transform the value here...396" dist = (dist - 0.25) * 2.0;"397""398" float fraction;"399// (placeholder for cycle code)400" %s"401""402" float4 result = lerp(color1, color2, fraction);"403""404// (placeholder for mask modulation code)405" %s"406""407// modulate with current color in order to apply extra alpha408" color *= result;"409"}";410411/**412* Flags that can be bitwise-or'ed together to control how the shader413* source code is generated.414*/415#define BASIC_GRAD_IS_CYCLIC (1 << 0)416#define BASIC_GRAD_USE_MASK (1 << 1)417#define MAX_BASIC_GRAD (1 << 2)418419static void420D3DShaderGen_GenerateBasicGradShader(int flags)421{422int colorSampler = IS_SET(BASIC_GRAD_USE_MASK) ? 1 : 0;423char *cycleCode;424char *maskVars = "";425char *maskInput = "";426char *maskCode = "";427char finalSource[3000];428429J2dTraceLn1(J2D_TRACE_INFO,430"D3DShaderGen_GenerateBasicGradShader",431flags);432433if (IS_SET(BASIC_GRAD_IS_CYCLIC)) {434cycleCode =435"fraction = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);";436} else {437cycleCode =438"fraction = clamp(dist, 0.0, 1.0);";439}440441if (IS_SET(BASIC_GRAD_USE_MASK)) {442/*443* This code modulates the calculated result color with the444* corresponding alpha value from the alpha mask texture active445* on texture unit 0. Only needed when useMask is true (i.e., only446* for MaskFill operations).447*/448maskVars = "sampler2D mask : register(s0);";449maskInput = "in float4 maskCoord : TEXCOORD0,";450maskCode = "result *= tex2D(mask, maskCoord.xy).a;";451}452453// compose the final source code string from the various pieces454sprintf(finalSource, basicGradientShaderSource,455maskVars, maskInput, colorSampler, cycleCode, maskCode);456457D3DShaderGen_WritePixelShader(finalSource, "grad", flags);458}459460/****************** Shared MultipleGradientPaint support ********************/461462/**463* These constants are identical to those defined in the464* MultipleGradientPaint.CycleMethod enum; they are copied here for465* convenience (ideally we would pull them directly from the Java level,466* but that entails more hassle than it is worth).467*/468#define CYCLE_NONE 0469#define CYCLE_REFLECT 1470#define CYCLE_REPEAT 2471472/**473* The following constants are flags that can be bitwise-or'ed together474* to control how the MultipleGradientPaint shader source code is generated:475*476* MULTI_GRAD_CYCLE_METHOD477* Placeholder for the CycleMethod enum constant.478*479* MULTI_GRAD_LARGE480* If set, use the (slower) shader that supports a larger number of481* gradient colors; otherwise, use the optimized codepath. See482* the MAX_FRACTIONS_SMALL/LARGE constants below for more details.483*484* MULTI_GRAD_USE_MASK485* If set, apply the alpha mask value from texture unit 1 to the486* final color result (only used in the MaskFill case).487*488* MULTI_GRAD_LINEAR_RGB489* If set, convert the linear RGB result back into the sRGB color space.490*/491//#define MULTI_GRAD_CYCLE_METHOD (3 << 0)492#define MULTI_GRAD_LARGE (1 << 2)493#define MULTI_GRAD_USE_MASK (1 << 3)494#define MULTI_GRAD_LINEAR_RGB (1 << 4)495496// REMIND497#define MAX_MULTI_GRAD (1 << 5)498499/** Extracts the CycleMethod enum value from the given flags variable. */500//#define EXTRACT_CYCLE_METHOD(flags) \501// ((flags) & MULTI_GRAD_CYCLE_METHOD)502503/**504* The maximum number of gradient "stops" supported by the fragment shader505* and related code. When the MULTI_GRAD_LARGE flag is set, we will use506* MAX_FRACTIONS_LARGE; otherwise, we use MAX_FRACTIONS_SMALL. By having507* two separate values, we can have one highly optimized shader (SMALL) that508* supports only a few fractions/colors, and then another, less optimal509* shader that supports more stops.510*/511#define MAX_FRACTIONS 8512#define MAX_FRACTIONS_LARGE MAX_FRACTIONS513#define MAX_FRACTIONS_SMALL 4514515/**516* The maximum number of gradient colors supported by all of the gradient517* fragment shaders. Note that this value must be a power of two, as it518* determines the size of the 1D texture created below. It also must be519* greater than or equal to MAX_FRACTIONS (there is no strict requirement520* that the two values be equal).521*/522#define MAX_COLORS 16523524static const char *multiGradientShaderSource =525// gradient texture size (in texels)526"#define TEXTURE_SIZE %d\n"527// maximum number of fractions/colors supported by this shader528"#define MAX_FRACTIONS %d\n"529// size of a single texel530"#define FULL_TEXEL (1.0 / float(TEXTURE_SIZE))\n"531// size of half of a single texel532"#define HALF_TEXEL (FULL_TEXEL / 2.0)\n"533// texture containing the gradient colors534"sampler2D colors : register (s%d);"535// array of gradient stops/fractions and corresponding scale factors536// fractions[i].x = gradientStop[i]537// fractions[i].y = scaleFactor[i]538"float2 fractions[MAX_FRACTIONS] : register (c0);"539// (placeholder for mask variable)540"%s"541// (placeholder for Linear/RadialGP-specific variables)542"%s"543""544// (placeholder for mask texcoord input)545"void main(%s"546" in float4 winCoord : TEXCOORD%d,"547" inout float4 color : COLOR0)"548"{"549" float dist;"550// (placeholder for Linear/RadialGradientPaint-specific code)551" %s"552""553" float4 result;"554// (placeholder for CycleMethod-specific code)555" %s"556""557// (placeholder for ColorSpace conversion code)558" %s"559""560// (placeholder for mask modulation code)561" %s"562""563// modulate with current color in order to apply extra alpha564" color *= result;"565"}";566567/*568* Note: An earlier version of this code would simply calculate a single569* texcoord:570* "tc = HALF_TEXEL + (FULL_TEXEL * relFraction);"571* and then use that value to do a single texture lookup, taking advantage572* of the LINEAR texture filtering mode which in theory will do the573* appropriate linear interpolation between adjacent texels, like this:574* "float4 result = tex2D(colors, float2(tc, 0.5));"575*576* The problem with that approach is that on certain hardware (from ATI,577* notably) the LINEAR texture fetch unit has low precision, and would578* for instance only produce 64 distinct grayscales between white and black,579* instead of the expected 256. The visual banding caused by this issue580* is severe enough to likely cause complaints from developers, so we have581* devised a new approach below that instead manually fetches the two582* relevant neighboring texels and then performs the linear interpolation583* using the lerp() instruction (which does not suffer from the precision584* issues of the fixed-function texture filtering unit). This new approach585* requires a few more instructions and is therefore slightly slower than586* the old approach (not more than 10% or so).587*/588static const char *texCoordCalcCode =589"int i;"590"float relFraction = 0.0;"591"for (i = 0; i < MAX_FRACTIONS-1; i++) {"592" relFraction +="593" clamp((dist - fractions[i].x) * fractions[i].y, 0.0, 1.0);"594"}"595// we offset by half a texel so that we find the linearly interpolated596// color between the two texel centers of interest597"float intPart = floor(relFraction);"598"float tc1 = HALF_TEXEL + (FULL_TEXEL * intPart);"599"float tc2 = HALF_TEXEL + (FULL_TEXEL * (intPart + 1.0));"600"float4 clr1 = tex2D(colors, float2(tc1, 0.5));"601"float4 clr2 = tex2D(colors, float2(tc2, 0.5));"602"result = lerp(clr1, clr2, frac(relFraction));";603604/** Code for NO_CYCLE that gets plugged into the CycleMethod placeholder. */605static const char *noCycleCode =606"if (dist <= 0.0) {"607" result = tex2D(colors, float2(0.0, 0.5));"608"} else if (dist >= 1.0) {"609" result = tex2D(colors, float2(1.0, 0.5));"610"} else {"611// (placeholder for texcoord calculation)612" %s"613"}";614615/** Code for REFLECT that gets plugged into the CycleMethod placeholder. */616static const char *reflectCode =617"dist = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);"618// (placeholder for texcoord calculation)619"%s";620621/** Code for REPEAT that gets plugged into the CycleMethod placeholder. */622static const char *repeatCode =623"dist = frac(dist);"624// (placeholder for texcoord calculation)625"%s";626627static void628D3DShaderGen_GenerateMultiGradShader(int flags, char *name,629char *paintVars, char *distCode)630{631char *maskVars = "";632char *maskInput = "";633char *maskCode = "";634char *colorSpaceCode = "";635char cycleCode[1500];636char finalSource[3000];637int colorSampler = IS_SET(MULTI_GRAD_USE_MASK) ? 1 : 0;638int cycleMethod = EXTRACT_CYCLE_METHOD(flags);639int maxFractions = IS_SET(MULTI_GRAD_LARGE) ?640MAX_FRACTIONS_LARGE : MAX_FRACTIONS_SMALL;641642J2dTraceLn(J2D_TRACE_INFO, "OGLPaints_CreateMultiGradProgram");643644if (IS_SET(MULTI_GRAD_USE_MASK)) {645/*646* This code modulates the calculated result color with the647* corresponding alpha value from the alpha mask texture active648* on texture unit 0. Only needed when useMask is true (i.e., only649* for MaskFill operations).650*/651maskVars = "sampler2D mask : register(s0);";652maskInput = "in float4 maskCoord : TEXCOORD0,";653maskCode = "result *= tex2D(mask, maskCoord.xy).a;";654}655656if (IS_SET(MULTI_GRAD_LINEAR_RGB)) {657/*658* This code converts a single pixel in linear RGB space back659* into sRGB (note: this code was adapted from the660* MultipleGradientPaintContext.convertLinearRGBtoSRGB() method).661*/662colorSpaceCode =663"result.rgb = 1.055 * pow(result.rgb, 0.416667) - 0.055;";664}665666if (cycleMethod == CYCLE_NONE) {667sprintf(cycleCode, noCycleCode, texCoordCalcCode);668} else if (cycleMethod == CYCLE_REFLECT) {669sprintf(cycleCode, reflectCode, texCoordCalcCode);670} else { // (cycleMethod == CYCLE_REPEAT)671sprintf(cycleCode, repeatCode, texCoordCalcCode);672}673674// compose the final source code string from the various pieces675sprintf(finalSource, multiGradientShaderSource,676MAX_COLORS, maxFractions, colorSampler,677maskVars, paintVars, maskInput, colorSampler,678distCode, cycleCode, colorSpaceCode, maskCode);679680D3DShaderGen_WritePixelShader(finalSource, name, flags);681}682683/********************** LinearGradientPaint support *************************/684685static void686D3DShaderGen_GenerateLinearGradShader(int flags)687{688char *paintVars;689char *distCode;690691J2dTraceLn1(J2D_TRACE_INFO,692"D3DShaderGen_GenerateLinearGradShader",693flags);694695/*696* To simplify the code and to make it easier to upload a number of697* uniform values at once, we pack a bunch of scalar (float) values698* into a single float3 below. Here's how the values are related:699*700* params.x = p0701* params.y = p1702* params.z = p3703*/704paintVars =705"float3 params : register(c16);";706distCode =707"float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"708"dist = dot(params.xyz, fragCoord);";709710D3DShaderGen_GenerateMultiGradShader(flags, "linear",711paintVars, distCode);712}713714/********************** RadialGradientPaint support *************************/715716static void717D3DShaderGen_GenerateRadialGradShader(int flags)718{719char *paintVars;720char *distCode;721722J2dTraceLn1(J2D_TRACE_INFO,723"D3DShaderGen_GenerateRadialGradShader",724flags);725726/*727* To simplify the code and to make it easier to upload a number of728* uniform values at once, we pack a bunch of scalar (float) values729* into float3 values below. Here's how the values are related:730*731* m0.x = m00732* m0.y = m01733* m0.z = m02734*735* m1.x = m10736* m1.y = m11737* m1.z = m12738*739* precalc.x = focusX740* precalc.y = 1.0 - (focusX * focusX)741* precalc.z = 1.0 / precalc.z742*/743paintVars =744"float3 m0 : register(c16);"745"float3 m1 : register(c17);"746"float3 precalc : register(c18);";747748/*749* The following code is derived from Daniel Rice's whitepaper on750* radial gradient performance (attached to the bug report for 6521533).751* Refer to that document as well as the setup code in the Java-level752* BufferedPaints.setRadialGradientPaint() method for more details.753*/754distCode =755"float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"756"float x = dot(fragCoord, m0);"757"float y = dot(fragCoord, m1);"758"float xfx = x - precalc.x;"759"dist = (precalc.x*xfx + sqrt(xfx*xfx + y*y*precalc.y))*precalc.z;";760761D3DShaderGen_GenerateMultiGradShader(flags, "radial",762paintVars, distCode);763}764765/*************************** LCD text support *******************************/766767// REMIND: Shader uses texture addressing operations in a dependency chain768// that is too complex for the target shader model (ps_2_0) to handle769// (ugh, I guess we can either require ps_3_0 or just use770// the slower pow intrinsic)771#define POW_LUT 0772773static const char *lcdTextShaderSource =774"float3 srcAdj : register(c0);"775"sampler2D glyphTex : register(s0);"776"sampler2D dstTex : register(s1);"777#if POW_LUT778"sampler3D invgammaTex : register(s2);"779"sampler3D gammaTex : register(s3);"780#else781"float3 invgamma : register(c1);"782"float3 gamma : register(c2);"783#endif784""785"void main(in float2 tc0 : TEXCOORD0,"786" in float2 tc1 : TEXCOORD1,"787" inout float4 color : COLOR0)"788"{"789// load the RGB value from the glyph image at the current texcoord790" float3 glyphClr = tex2D(glyphTex, tc0).rgb;"791" if (!any(glyphClr)) {"792// zero coverage, so skip this fragment793" discard;"794" }"795// load the RGB value from the corresponding destination pixel796" float3 dstClr = tex2D(dstTex, tc1).rgb;"797// gamma adjust the dest color using the invgamma LUT798#if POW_LUT799" float3 dstAdj = tex3D(invgammaTex, dstClr).rgb;"800#else801" float3 dstAdj = pow(dstClr, invgamma);"802#endif803// linearly interpolate the three color values804" float3 result = lerp(dstAdj, srcAdj, glyphClr);"805// gamma re-adjust the resulting color (alpha is always set to 1.0)806#if POW_LUT807" color = float4(tex3D(gammaTex, result).rgb, 1.0);"808#else809" color = float4(pow(result, gamma), 1.0);"810#endif811"}";812813static void814D3DShaderGen_GenerateLCDTextShader()815{816J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateLCDTextShader");817818D3DShaderGen_WritePixelShader((char *)lcdTextShaderSource, "lcdtext", 0);819}820821/*************************** AA support *******************************/822823/*824* This shader fills the space between an outer and inner parallelogram.825* It can be used to draw an outline by specifying both inner and outer826* values. It fills pixels by estimating what portion falls inside the827* outer shape, and subtracting an estimate of what portion falls inside828* the inner shape. Specifying both inner and outer values produces a829* standard "wide outline". Specifying an inner shape that falls far830* outside the outer shape allows the same shader to fill the outer831* shape entirely since pixels that fall within the outer shape are never832* inside the inner shape and so they are filled based solely on their833* coverage of the outer shape.834*835* The setup code renders this shader over the bounds of the outer836* shape (or the only shape in the case of a fill operation) and837* sets the texture 0 coordinates so that 0,0=>0,1=>1,1=>1,0 in those838* texture coordinates map to the four corners of the parallelogram.839* Similarly the texture 1 coordinates map the inner shape to the840* unit square as well, but in a different coordinate system.841*842* When viewed in the texture coordinate systems the parallelograms843* we are filling are unit squares, but the pixels have then become844* tiny parallelograms themselves. Both of the texture coordinate845* systems are affine transforms so the rate of change in X and Y846* of the texture coordinates are essentially constants and happen847* to correspond to the size and direction of the slanted sides of848* the distorted pixels relative to the "square mapped" boundary849* of the parallelograms.850*851* The shader uses the ddx() and ddy() functions to measure the "rate852* of change" of these texture coordinates and thus gets an accurate853* measure of the size and shape of a pixel relative to the two854* parallelograms. It then uses the bounds of the size and shape855* of a pixel to intersect with the unit square to estimate the856* coverage of the pixel. Unfortunately, without a lot more work857* to calculate the exact area of intersection between a unit858* square (the original parallelogram) and a parallelogram (the859* distorted pixel), this shader only approximates the pixel860* coverage, but emperically the estimate is very useful and861* produces visually pleasing results, if not theoretically accurate.862*/863static const char *aaShaderSource =864"void main(in float2 tco : TEXCOORD0,"865" in float2 tci : TEXCOORD1,"866" inout float4 color : COLOR0)"867"{"868// Calculate the vectors for the "legs" of the pixel parallelogram869// for the outer parallelogram.870" float2 oleg1 = ddx(tco);"871" float2 oleg2 = ddy(tco);"872// Calculate the bounds of the distorted pixel parallelogram.873" float2 omin = min(tco, tco+oleg1);"874" omin = min(omin, tco+oleg2);"875" omin = min(omin, tco+oleg1+oleg2);"876" float2 omax = max(tco, tco+oleg1);"877" omax = max(omax, tco+oleg2);"878" omax = max(omax, tco+oleg1+oleg2);"879// Calculate the vectors for the "legs" of the pixel parallelogram880// for the inner parallelogram.881" float2 ileg1 = ddx(tci);"882" float2 ileg2 = ddy(tci);"883// Calculate the bounds of the distorted pixel parallelogram.884" float2 imin = min(tci, tci+ileg1);"885" imin = min(imin, tci+ileg2);"886" imin = min(imin, tci+ileg1+ileg2);"887" float2 imax = max(tci, tci+ileg1);"888" imax = max(imax, tci+ileg2);"889" imax = max(imax, tci+ileg1+ileg2);"890// Clamp the bounds of the parallelograms to the unit square to891// estimate the intersection of the pixel parallelogram with892// the unit square. The ratio of the 2 rectangle areas is a893// reasonable estimate of the proportion of coverage.894" float2 o1 = clamp(omin, 0.0, 1.0);"895" float2 o2 = clamp(omax, 0.0, 1.0);"896" float oint = (o2.y-o1.y)*(o2.x-o1.x);"897" float oarea = (omax.y-omin.y)*(omax.x-omin.x);"898" float2 i1 = clamp(imin, 0.0, 1.0);"899" float2 i2 = clamp(imax, 0.0, 1.0);"900" float iint = (i2.y-i1.y)*(i2.x-i1.x);"901" float iarea = (imax.y-imin.y)*(imax.x-imin.x);"902// Proportion of pixel in outer shape minus the proportion903// of pixel in the inner shape == the coverage of the pixel904// in the area between the two.905" float coverage = oint/oarea - iint / iarea;"906" color *= coverage;"907"}";908909static void910D3DShaderGen_GenerateAAParallelogramShader()911{912J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateAAParallelogramShader");913914D3DShaderGen_WriteShader((char *)aaShaderSource, "ps_2_a", "aapgram", 0);915}916917/**************************** Main entrypoint *******************************/918919static void920D3DShaderGen_GenerateAllShaders()921{922int i;923924#if 1925// Generate BufferedImageOp shaders926for (i = 0; i < MAX_RESCALE; i++) {927D3DShaderGen_GenerateRescaleShader(i);928}929D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE);930for (i = 0; i < MAX_CONVOLVE; i++) {931D3DShaderGen_GenerateConvolveShader(i);932}933D3DShaderGen_WriteShaderArray("convolve", MAX_CONVOLVE);934for (i = 0; i < MAX_LOOKUP; i++) {935D3DShaderGen_GenerateLookupShader(i);936}937D3DShaderGen_WriteShaderArray("lookup", MAX_LOOKUP);938939// Generate Paint shaders940for (i = 0; i < MAX_BASIC_GRAD; i++) {941D3DShaderGen_GenerateBasicGradShader(i);942}943D3DShaderGen_WriteShaderArray("grad", MAX_BASIC_GRAD);944for (i = 0; i < MAX_MULTI_GRAD; i++) {945if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND946D3DShaderGen_GenerateLinearGradShader(i);947}948D3DShaderGen_WriteShaderArray("linear", MAX_MULTI_GRAD);949for (i = 0; i < MAX_MULTI_GRAD; i++) {950if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND951D3DShaderGen_GenerateRadialGradShader(i);952}953D3DShaderGen_WriteShaderArray("radial", MAX_MULTI_GRAD);954955// Generate LCD text shader956D3DShaderGen_GenerateLCDTextShader();957958// Genereate Shader to fill Antialiased parallelograms959D3DShaderGen_GenerateAAParallelogramShader();960#else961/*962for (i = 0; i < MAX_RESCALE; i++) {963D3DShaderGen_GenerateRescaleShader(i);964}965D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE);966*/967//D3DShaderGen_GenerateConvolveShader(2);968//D3DShaderGen_GenerateLCDTextShader();969//D3DShaderGen_GenerateLinearGradShader(16);970D3DShaderGen_GenerateBasicGradShader(0);971#endif972}973974int975main(int argc, char **argv)976{977fpHeader = fopen(strHeaderFile, "a");978979D3DShaderGen_GenerateAllShaders();980981fclose(fpHeader);982983return 0;984}985986987