Path: blob/21.2-virgl/src/gallium/auxiliary/vl/vl_compositor_cs.c
4565 views
/**************************************************************************1*2* Copyright 2019 Advanced Micro Devices, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25* Authors: James Zhu <james.zhu<@amd.com>26*27**************************************************************************/2829#include <assert.h>3031#include "tgsi/tgsi_text.h"32#include "vl_compositor_cs.h"3334struct cs_viewport {35float scale_x;36float scale_y;37struct u_rect area;38int translate_x;39int translate_y;40float sampler0_w;41float sampler0_h;42};4344const char *compute_shader_video_buffer =45"COMP\n"46"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"47"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"48"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"4950"DCL SV[0], THREAD_ID\n"51"DCL SV[1], BLOCK_ID\n"5253"DCL CONST[0..6]\n"54"DCL SVIEW[0..2], RECT, FLOAT\n"55"DCL SAMP[0..2]\n"5657"DCL IMAGE[0], 2D, WR\n"58"DCL TEMP[0..7]\n"5960"IMM[0] UINT32 { 8, 8, 1, 0}\n"61"IMM[1] FLT32 { 1.0, 0.0, 0.0, 0.0}\n"6263"UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"6465/* Drawn area check */66"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"67"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"68"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"69"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"70"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"7172"UIF TEMP[1].xxxx\n"73/* Translate */74"UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"75"U2F TEMP[2].xy, TEMP[2].xyyy\n"76"MUL TEMP[3].xy, TEMP[2].xyyy, CONST[6].xyyy\n"7778/* Scale */79"DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"80"DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"8182/* Fetch texels */83"TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"84"TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"85"TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"8687"MOV TEMP[4].w, IMM[1].xxxx\n"8889/* Color Space Conversion */90"DP4 TEMP[7].x, CONST[0], TEMP[4]\n"91"DP4 TEMP[7].y, CONST[1], TEMP[4]\n"92"DP4 TEMP[7].z, CONST[2], TEMP[4]\n"9394"MOV TEMP[5].w, TEMP[4].zzzz\n"95"SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n"96"SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n"9798"MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n"99100"STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"101"ENDIF\n"102103"END\n";104105const char *compute_shader_weave =106"COMP\n"107"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"108"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"109"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"110111"DCL SV[0], THREAD_ID\n"112"DCL SV[1], BLOCK_ID\n"113114"DCL CONST[0..5]\n"115"DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"116"DCL SAMP[0..2]\n"117118"DCL IMAGE[0], 2D, WR\n"119"DCL TEMP[0..15]\n"120121"IMM[0] UINT32 { 8, 8, 1, 0}\n"122"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"123"IMM[2] UINT32 { 1, 2, 4, 0}\n"124"IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"125126"UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"127128/* Drawn area check */129"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"130"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"131"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"132"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"133"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"134135"UIF TEMP[1].xxxx\n"136"MOV TEMP[2].xy, TEMP[0].xyyy\n"137/* Translate */138"UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"139140/* Top Y */141"U2F TEMP[2].xy, TEMP[2].xyyy\n"142"DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"143/* Down Y */144"MOV TEMP[12].xy, TEMP[2].xyyy\n"145146/* Top UV */147"MOV TEMP[3].xy, TEMP[2].xyyy\n"148"DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"149/* Down UV */150"MOV TEMP[13].xy, TEMP[3].xyyy\n"151152/* Texture offset */153"ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"154"ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"155"ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"156"ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"157158"ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"159"ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"160"ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"161"ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"162163/* Scale */164"DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"165"DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"166"DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"167"DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"168169/* Weave offset */170"ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"171"ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"172"ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"173"ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"174175/* Texture layer */176"MOV TEMP[14].x, TEMP[2].yyyy\n"177"MOV TEMP[14].yz, TEMP[3].yyyy\n"178"ROUND TEMP[15].xyz, TEMP[14].xyzz\n"179"ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n"180"MOV TEMP[14].xyz, |TEMP[14].xyzz|\n"181"MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n"182183/* Normalize */184"DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n"185"DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n"186"DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"187"DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n"188"DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n"189190/* Fetch texels */191"MOV TEMP[2].z, IMM[1].wwww\n"192"MOV TEMP[3].z, IMM[1].wwww\n"193"TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n"194"TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n"195"TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n"196197"MOV TEMP[12].z, IMM[1].xxxx\n"198"MOV TEMP[13].z, IMM[1].xxxx\n"199"TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n"200"TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n"201"TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n"202203"LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n"204"MOV TEMP[6].w, IMM[1].xxxx\n"205206/* Color Space Conversion */207"DP4 TEMP[9].x, CONST[0], TEMP[6]\n"208"DP4 TEMP[9].y, CONST[1], TEMP[6]\n"209"DP4 TEMP[9].z, CONST[2], TEMP[6]\n"210211"MOV TEMP[7].w, TEMP[6].zzzz\n"212"SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n"213"SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n"214215"MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n"216217"STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n"218"ENDIF\n"219220"END\n";221222const char *compute_shader_rgba =223"COMP\n"224"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"225"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"226"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"227228"DCL SV[0], THREAD_ID\n"229"DCL SV[1], BLOCK_ID\n"230231"DCL CONST[0..5]\n"232"DCL SVIEW[0], RECT, FLOAT\n"233"DCL SAMP[0]\n"234235"DCL IMAGE[0], 2D, WR\n"236"DCL TEMP[0..3]\n"237238"IMM[0] UINT32 { 8, 8, 1, 0}\n"239"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"240241"UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"242243/* Drawn area check */244"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"245"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"246"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"247"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"248"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"249250"UIF TEMP[1].xxxx\n"251/* Translate */252"UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"253"U2F TEMP[2].xy, TEMP[2].xyyy\n"254255/* Scale */256"DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"257258/* Fetch texels */259"TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n"260261"STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n"262"ENDIF\n"263264"END\n";265266static const char *compute_shader_yuv_weave_y =267"COMP\n"268"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"269"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"270"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"271272"DCL SV[0], THREAD_ID\n"273"DCL SV[1], BLOCK_ID\n"274275"DCL CONST[0..5]\n"276"DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"277"DCL SAMP[0..2]\n"278279"DCL IMAGE[0], 2D, WR\n"280"DCL TEMP[0..15]\n"281282"IMM[0] UINT32 { 8, 8, 1, 0}\n"283"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"284"IMM[2] UINT32 { 1, 2, 4, 0}\n"285"IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"286287"UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"288289/* Drawn area check */290"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"291"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"292"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"293"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"294"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"295296"UIF TEMP[1]\n"297"MOV TEMP[2], TEMP[0]\n"298/* Translate */299"UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"300301/* Top Y */302"U2F TEMP[2], TEMP[2]\n"303"DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"304/* Down Y */305"MOV TEMP[12], TEMP[2]\n"306307/* Top UV */308"MOV TEMP[3], TEMP[2]\n"309"DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"310/* Down UV */311"MOV TEMP[13], TEMP[3]\n"312313/* Texture offset */314"ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"315"ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"316"ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"317"ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"318319"ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"320"ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"321"ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"322"ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"323324/* Scale */325"DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"326"DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"327"DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"328"DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"329330/* Weave offset */331"ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"332"ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"333"ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"334"ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"335336/* Texture layer */337"MOV TEMP[14].x, TEMP[2].yyyy\n"338"MOV TEMP[14].yz, TEMP[3].yyyy\n"339"ROUND TEMP[15], TEMP[14]\n"340"ADD TEMP[14], TEMP[14], -TEMP[15]\n"341"MOV TEMP[14], |TEMP[14]|\n"342"MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"343344/* Normalize */345"DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"346"DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"347"DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"348"DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"349"DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"350351/* Fetch texels */352"MOV TEMP[2].z, IMM[1].wwww\n"353"MOV TEMP[3].z, IMM[1].wwww\n"354"TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"355"TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"356"TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"357358"MOV TEMP[12].z, IMM[1].xxxx\n"359"MOV TEMP[13].z, IMM[1].xxxx\n"360"TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"361"TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"362"TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"363364"LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"365"MOV TEMP[6].w, IMM[1].xxxx\n"366367"STORE IMAGE[0], TEMP[0], TEMP[6], 2D\n"368"ENDIF\n"369370"END\n";371372static const char *compute_shader_yuv_weave_uv =373"COMP\n"374"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"375"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"376"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"377378"DCL SV[0], THREAD_ID\n"379"DCL SV[1], BLOCK_ID\n"380381"DCL CONST[0..5]\n"382"DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"383"DCL SAMP[0..2]\n"384385"DCL IMAGE[0], 2D, WR\n"386"DCL TEMP[0..15]\n"387388"IMM[0] UINT32 { 8, 8, 1, 0}\n"389"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"390"IMM[2] UINT32 { 1, 2, 4, 0}\n"391"IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"392393"UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"394395/* Drawn area check */396"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"397"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"398"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"399"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"400"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"401402"UIF TEMP[1]\n"403"MOV TEMP[2], TEMP[0]\n"404/* Translate */405"UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"406407/* Top Y */408"U2F TEMP[2], TEMP[2]\n"409"DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"410/* Down Y */411"MOV TEMP[12], TEMP[2]\n"412413/* Top UV */414"MOV TEMP[3], TEMP[2]\n"415"DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"416/* Down UV */417"MOV TEMP[13], TEMP[3]\n"418419/* Texture offset */420"ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"421"ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"422"ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"423"ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"424425"ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"426"ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"427"ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"428"ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"429430/* Scale */431"DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"432"DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"433"DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"434"DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"435436/* Weave offset */437"ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"438"ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"439"ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"440"ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"441442/* Texture layer */443"MOV TEMP[14].x, TEMP[2].yyyy\n"444"MOV TEMP[14].yz, TEMP[3].yyyy\n"445"ROUND TEMP[15], TEMP[14]\n"446"ADD TEMP[14], TEMP[14], -TEMP[15]\n"447"MOV TEMP[14], |TEMP[14]|\n"448"MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"449450/* Normalize */451"DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"452"DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"453"DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"454"DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"455"DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"456457/* Fetch texels */458"MOV TEMP[2].z, IMM[1].wwww\n"459"MOV TEMP[3].z, IMM[1].wwww\n"460"TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"461"TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"462"TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"463464"MOV TEMP[12].z, IMM[1].xxxx\n"465"MOV TEMP[13].z, IMM[1].xxxx\n"466"TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"467"TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"468"TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"469470"LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"471"MOV TEMP[6].w, IMM[1].xxxx\n"472473"MOV TEMP[7].xy, TEMP[6].yzww\n"474475"STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"476"ENDIF\n"477478"END\n";479480static const char *compute_shader_yuv_bob_y =481"COMP\n"482"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"483"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"484"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"485486"DCL SV[0], THREAD_ID\n"487"DCL SV[1], BLOCK_ID\n"488489"DCL CONST[0..5]\n"490"DCL SVIEW[0..2], RECT, FLOAT\n"491"DCL SAMP[0..2]\n"492493"DCL IMAGE[0], 2D, WR\n"494"DCL TEMP[0..4]\n"495496"IMM[0] UINT32 { 8, 8, 1, 0}\n"497"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"498499"UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"500501/* Drawn area check */502"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"503"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"504"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"505"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"506"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"507508"UIF TEMP[1]\n"509/* Translate */510"UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"511"U2F TEMP[2], TEMP[2]\n"512"DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"513514/* Scale */515"DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"516"DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"517"DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"518"DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"519520/* Fetch texels */521"TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"522"TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"523"TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"524525"MOV TEMP[4].w, IMM[1].xxxx\n"526527"STORE IMAGE[0], TEMP[0], TEMP[4], 2D\n"528"ENDIF\n"529530"END\n";531532static const char *compute_shader_yuv_bob_uv =533"COMP\n"534"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"535"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"536"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"537538"DCL SV[0], THREAD_ID\n"539"DCL SV[1], BLOCK_ID\n"540541"DCL CONST[0..5]\n"542"DCL SVIEW[0..2], RECT, FLOAT\n"543"DCL SAMP[0..2]\n"544545"DCL IMAGE[0], 2D, WR\n"546"DCL TEMP[0..5]\n"547548"IMM[0] UINT32 { 8, 8, 1, 0}\n"549"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"550551"UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"552553/* Drawn area check */554"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"555"USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"556"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"557"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"558"AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"559560"UIF TEMP[1]\n"561/* Translate */562"UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"563"U2F TEMP[2], TEMP[2]\n"564"DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"565566/* Scale */567"DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"568"DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"569"DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"570"DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"571572/* Fetch texels */573"TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"574"TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"575"TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"576577"MOV TEMP[4].w, IMM[1].xxxx\n"578579"MOV TEMP[5].xy, TEMP[4].yzww\n"580581"STORE IMAGE[0], TEMP[0], TEMP[5], 2D\n"582"ENDIF\n"583584"END\n";585586static void587cs_launch(struct vl_compositor *c,588void *cs,589const struct u_rect *draw_area)590{591struct pipe_context *ctx = c->pipe;592593/* Bind the image */594struct pipe_image_view image = {0};595image.resource = c->fb_state.cbufs[0]->texture;596image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;597image.format = c->fb_state.cbufs[0]->texture->format;598599ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);600601/* Bind compute shader */602ctx->bind_compute_state(ctx, cs);603604/* Dispatch compute */605struct pipe_grid_info info = {0};606info.block[0] = 8;607info.block[1] = 8;608info.block[2] = 1;609info.grid[0] = DIV_ROUND_UP(draw_area->x1, info.block[0]);610info.grid[1] = DIV_ROUND_UP(draw_area->y1, info.block[1]);611info.grid[2] = 1;612613ctx->launch_grid(ctx, &info);614615/* Make the result visible to all clients. */616ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);617618}619620static inline struct u_rect621calc_drawn_area(struct vl_compositor_state *s,622struct vl_compositor_layer *layer)623{624struct vertex2f tl, br;625struct u_rect result;626627assert(s && layer);628629tl = layer->dst.tl;630br = layer->dst.br;631632/* Scale */633result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];634result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];635result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];636result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];637638/* Clip */639result.x0 = MAX2(result.x0, s->scissor.minx);640result.y0 = MAX2(result.y0, s->scissor.miny);641result.x1 = MIN2(result.x1, s->scissor.maxx);642result.y1 = MIN2(result.y1, s->scissor.maxy);643return result;644}645646static bool647set_viewport(struct vl_compositor_state *s,648struct cs_viewport *drawn,649struct pipe_sampler_view **samplers)650{651struct pipe_transfer *buf_transfer;652653assert(s && drawn);654655void *ptr = pipe_buffer_map(s->pipe, s->shader_params,656PIPE_MAP_READ | PIPE_MAP_WRITE,657&buf_transfer);658659if (!ptr)660return false;661662float *ptr_float = (float *)ptr;663ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2;664*ptr_float++ = drawn->scale_x;665*ptr_float++ = drawn->scale_y;666667int *ptr_int = (int *)ptr_float;668*ptr_int++ = drawn->area.x0;669*ptr_int++ = drawn->area.y0;670*ptr_int++ = drawn->area.x1;671*ptr_int++ = drawn->area.y1;672*ptr_int++ = drawn->translate_x;673*ptr_int++ = drawn->translate_y;674675ptr_float = (float *)ptr_int;676*ptr_float++ = drawn->sampler0_w;677*ptr_float++ = drawn->sampler0_h;678679/* compute_shader_video_buffer uses pixel coordinates based on the680* Y sampler dimensions. If U/V are using separate planes and are681* subsampled, we need to scale the coordinates */682if (samplers[1]) {683float h_ratio = samplers[1]->texture->width0 /684(float) samplers[0]->texture->width0;685*ptr_float++ = h_ratio;686float v_ratio = samplers[1]->texture->height0 /687(float) samplers[0]->texture->height0;688*ptr_float++ = v_ratio;689}690pipe_buffer_unmap(s->pipe, buf_transfer);691692return true;693}694695static void696draw_layers(struct vl_compositor *c,697struct vl_compositor_state *s,698struct u_rect *dirty)699{700unsigned i;701702assert(c);703704for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {705if (s->used_layers & (1 << i)) {706struct vl_compositor_layer *layer = &s->layers[i];707struct pipe_sampler_view **samplers = &layer->sampler_views[0];708unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;709struct cs_viewport drawn;710711drawn.area = calc_drawn_area(s, layer);712drawn.scale_x = layer->viewport.scale[0] /713(float)layer->sampler_views[0]->texture->width0 *714(layer->src.br.x - layer->src.tl.x);715drawn.scale_y = layer->viewport.scale[1] /716((float)layer->sampler_views[0]->texture->height0 *717(s->interlaced ? 2.0 : 1.0) *718(layer->src.br.y - layer->src.tl.y));719720drawn.translate_x = (int)layer->viewport.translate[0];721drawn.translate_y = (int)layer->viewport.translate[1];722drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;723drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;724set_viewport(s, &drawn, samplers);725726c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,727num_sampler_views, layer->samplers);728c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,729num_sampler_views, 0, samplers);730731cs_launch(c, layer->cs, &(drawn.area));732733/* Unbind. */734c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);735c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);736c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, 0,737num_sampler_views, NULL);738c->pipe->bind_compute_state(c->pipe, NULL);739c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,740num_sampler_views, NULL);741742if (dirty) {743struct u_rect drawn = calc_drawn_area(s, layer);744dirty->x0 = MIN2(drawn.x0, dirty->x0);745dirty->y0 = MIN2(drawn.y0, dirty->y0);746dirty->x1 = MAX2(drawn.x1, dirty->x1);747dirty->y1 = MAX2(drawn.y1, dirty->y1);748}749}750}751}752753void *754vl_compositor_cs_create_shader(struct vl_compositor *c,755const char *compute_shader_text)756{757assert(c && compute_shader_text);758759struct tgsi_token tokens[1024];760if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) {761assert(0);762return NULL;763}764765struct pipe_compute_state state = {0};766state.ir_type = PIPE_SHADER_IR_TGSI;767state.prog = tokens;768769/* create compute shader */770return c->pipe->create_compute_state(c->pipe, &state);771}772773void774vl_compositor_cs_render(struct vl_compositor_state *s,775struct vl_compositor *c,776struct pipe_surface *dst_surface,777struct u_rect *dirty_area,778bool clear_dirty)779{780assert(c && s);781assert(dst_surface);782783c->fb_state.width = dst_surface->width;784c->fb_state.height = dst_surface->height;785c->fb_state.cbufs[0] = dst_surface;786787if (!s->scissor_valid) {788s->scissor.minx = 0;789s->scissor.miny = 0;790s->scissor.maxx = dst_surface->width;791s->scissor.maxy = dst_surface->height;792}793794if (clear_dirty && dirty_area &&795(dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {796797c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,7980, 0, dst_surface->width, dst_surface->height, false);799dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;800dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;801}802803pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);804805draw_layers(c, s, dirty_area);806}807808bool vl_compositor_cs_init_shaders(struct vl_compositor *c)809{810assert(c);811812c->cs_video_buffer = vl_compositor_cs_create_shader(c, compute_shader_video_buffer);813if (!c->cs_video_buffer) {814debug_printf("Unable to create video_buffer compute shader.\n");815return false;816}817818c->cs_weave_rgb = vl_compositor_cs_create_shader(c, compute_shader_weave);819if (!c->cs_weave_rgb) {820debug_printf("Unable to create weave_rgb compute shader.\n");821return false;822}823824c->cs_yuv.weave.y = vl_compositor_cs_create_shader(c, compute_shader_yuv_weave_y);825c->cs_yuv.weave.uv = vl_compositor_cs_create_shader(c, compute_shader_yuv_weave_uv);826c->cs_yuv.bob.y = vl_compositor_cs_create_shader(c, compute_shader_yuv_bob_y);827c->cs_yuv.bob.uv = vl_compositor_cs_create_shader(c, compute_shader_yuv_bob_uv);828if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv ||829!c->cs_yuv.bob.y || !c->cs_yuv.bob.uv) {830debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");831return false;832}833834return true;835}836837void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)838{839assert(c);840841if (c->cs_video_buffer)842c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);843if (c->cs_weave_rgb)844c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);845if (c->cs_yuv.weave.y)846c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);847if (c->cs_yuv.weave.uv)848c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);849if (c->cs_yuv.bob.y)850c->pipe->delete_compute_state(c->pipe, c->cs_yuv.bob.y);851if (c->cs_yuv.bob.uv)852c->pipe->delete_compute_state(c->pipe, c->cs_yuv.bob.uv);853}854855856