Path: blob/21.2-virgl/src/gallium/auxiliary/vl/vl_idct.c
4565 views
/**************************************************************************1*2* Copyright 2010 Christian König3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include <assert.h>2829#include "pipe/p_context.h"30#include "pipe/p_screen.h"3132#include "util/u_draw.h"33#include "util/u_sampler.h"34#include "util/u_memory.h"3536#include "tgsi/tgsi_ureg.h"3738#include "vl_defines.h"39#include "vl_types.h"40#include "vl_vertex_buffers.h"41#include "vl_idct.h"4243enum VS_OUTPUT44{45VS_O_VPOS = 0,46VS_O_L_ADDR0 = 0,47VS_O_L_ADDR1,48VS_O_R_ADDR0,49VS_O_R_ADDR150};5152/**53* The DCT matrix stored as hex representation of floats. Equal to the following equation:54* for (i = 0; i < 8; ++i)55* for (j = 0; j < 8; ++j)56* if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);57* else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));58*/59static const uint32_t const_matrix[8][8] = {60{ 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },61{ 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },62{ 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },63{ 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },64{ 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },65{ 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },66{ 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },67{ 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },68};6970static void71calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],72struct ureg_src tc, struct ureg_src start, bool right_side,73bool transposed, float size)74{75unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;76unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;7778unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;79unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;8081/*82* addr[0..1].(start) = right_side ? start.x : tc.x83* addr[0..1].(tc) = right_side ? tc.y : start.y84* addr[0..1].z = tc.z85* addr[1].(start) += 1.0f / scale86*/87ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));88ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));8990ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));91ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));92}9394static void95increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],96struct ureg_src saddr[2], bool right_side, bool transposed,97int pos, float size)98{99unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;100unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;101102/*103* daddr[0..1].(start) = saddr[0..1].(start)104* daddr[0..1].(tc) = saddr[0..1].(tc)105*/106107ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);108ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));109ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);110ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));111}112113static void114fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],115struct ureg_src sampler, bool resource3d)116{117ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);118ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);119}120121static void122matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])123{124struct ureg_dst tmp;125126tmp = ureg_DECL_temporary(shader);127128/*129* tmp.xy = dot4(m[0][0..1], m[1][0..1])130* dst = tmp.x + tmp.y131*/132ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));133ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));134ureg_ADD(shader, dst,135ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),136ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));137138ureg_release_temporary(shader, tmp);139}140141static void *142create_mismatch_vert_shader(struct vl_idct *idct)143{144struct ureg_program *shader;145struct ureg_src vpos;146struct ureg_src scale;147struct ureg_dst t_tex;148struct ureg_dst o_vpos, o_addr[2];149150shader = ureg_create(PIPE_SHADER_VERTEX);151if (!shader)152return NULL;153154vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);155156t_tex = ureg_DECL_temporary(shader);157158o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);159160o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);161o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);162163/*164* scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)165*166* t_vpos = vpos + 7 / VL_BLOCK_WIDTH167* o_vpos.xy = t_vpos * scale168*169* o_addr = calc_addr(...)170*171*/172173scale = ureg_imm2f(shader,174(float)VL_BLOCK_WIDTH / idct->buffer_width,175(float)VL_BLOCK_HEIGHT / idct->buffer_height);176177ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);178ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));179180ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);181calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);182183ureg_release_temporary(shader, t_tex);184185ureg_END(shader);186187return ureg_create_shader_and_destroy(shader, idct->pipe);188}189190static void *191create_mismatch_frag_shader(struct vl_idct *idct)192{193struct ureg_program *shader;194195struct ureg_src addr[2];196197struct ureg_dst m[8][2];198struct ureg_dst fragment;199200unsigned i;201202shader = ureg_create(PIPE_SHADER_FRAGMENT);203if (!shader)204return NULL;205206addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);207addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);208209fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);210211for (i = 0; i < 8; ++i) {212m[i][0] = ureg_DECL_temporary(shader);213m[i][1] = ureg_DECL_temporary(shader);214}215216for (i = 0; i < 8; ++i) {217increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);218}219220for (i = 0; i < 8; ++i) {221struct ureg_src s_addr[2];222s_addr[0] = ureg_src(m[i][0]);223s_addr[1] = ureg_src(m[i][1]);224fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);225}226227for (i = 1; i < 8; ++i) {228ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));229ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));230}231232ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));233ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));234235ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));236ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));237ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));238239ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),240ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));241ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),242ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));243244ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));245ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));246247for (i = 0; i < 8; ++i) {248ureg_release_temporary(shader, m[i][0]);249ureg_release_temporary(shader, m[i][1]);250}251252ureg_END(shader);253254return ureg_create_shader_and_destroy(shader, idct->pipe);255}256257static void *258create_stage1_vert_shader(struct vl_idct *idct)259{260struct ureg_program *shader;261struct ureg_src vrect, vpos;262struct ureg_src scale;263struct ureg_dst t_tex, t_start;264struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];265266shader = ureg_create(PIPE_SHADER_VERTEX);267if (!shader)268return NULL;269270vrect = ureg_DECL_vs_input(shader, VS_I_RECT);271vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);272273t_tex = ureg_DECL_temporary(shader);274t_start = ureg_DECL_temporary(shader);275276o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);277278o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);279o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);280281o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);282o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);283284/*285* scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)286*287* t_vpos = vpos + vrect288* o_vpos.xy = t_vpos * scale289* o_vpos.zw = vpos290*291* o_l_addr = calc_addr(...)292* o_r_addr = calc_addr(...)293*294*/295296scale = ureg_imm2f(shader,297(float)VL_BLOCK_WIDTH / idct->buffer_width,298(float)VL_BLOCK_HEIGHT / idct->buffer_height);299300ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);301ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);302303ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));304ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));305306ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);307308calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);309calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);310311ureg_release_temporary(shader, t_tex);312ureg_release_temporary(shader, t_start);313314ureg_END(shader);315316return ureg_create_shader_and_destroy(shader, idct->pipe);317}318319static void *320create_stage1_frag_shader(struct vl_idct *idct)321{322struct ureg_program *shader;323struct ureg_src l_addr[2], r_addr[2];324struct ureg_dst l[4][2], r[2];325struct ureg_dst *fragment;326unsigned i;327int j;328329shader = ureg_create(PIPE_SHADER_FRAGMENT);330if (!shader)331return NULL;332333fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));334335l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);336l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);337338r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);339r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);340341for (i = 0; i < idct->nr_of_render_targets; ++i)342fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);343344for (i = 0; i < 4; ++i) {345l[i][0] = ureg_DECL_temporary(shader);346l[i][1] = ureg_DECL_temporary(shader);347}348349r[0] = ureg_DECL_temporary(shader);350r[1] = ureg_DECL_temporary(shader);351352for (i = 0; i < 4; ++i) {353increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);354}355356for (i = 0; i < 4; ++i) {357struct ureg_src s_addr[2];358s_addr[0] = ureg_src(l[i][0]);359s_addr[1] = ureg_src(l[i][1]);360fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);361}362363for (i = 0; i < idct->nr_of_render_targets; ++i) {364struct ureg_src s_addr[2];365366increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);367368s_addr[0] = ureg_src(r[0]);369s_addr[1] = ureg_src(r[1]);370fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);371372for (j = 0; j < 4; ++j) {373matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);374}375}376377for (i = 0; i < 4; ++i) {378ureg_release_temporary(shader, l[i][0]);379ureg_release_temporary(shader, l[i][1]);380}381ureg_release_temporary(shader, r[0]);382ureg_release_temporary(shader, r[1]);383384ureg_END(shader);385386FREE(fragment);387388return ureg_create_shader_and_destroy(shader, idct->pipe);389}390391void392vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,393unsigned first_output, struct ureg_dst tex)394{395struct ureg_src vrect, vpos;396struct ureg_src scale;397struct ureg_dst t_start;398struct ureg_dst o_l_addr[2], o_r_addr[2];399400vrect = ureg_DECL_vs_input(shader, VS_I_RECT);401vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);402403t_start = ureg_DECL_temporary(shader);404405--first_output;406407o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);408o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);409410o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);411o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);412413scale = ureg_imm2f(shader,414(float)VL_BLOCK_WIDTH / idct->buffer_width,415(float)VL_BLOCK_HEIGHT / idct->buffer_height);416417ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),418ureg_scalar(vrect, TGSI_SWIZZLE_X),419ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));420ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);421422calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);423calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);424425ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));426ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));427}428429void430vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,431unsigned first_input, struct ureg_dst fragment)432{433struct ureg_src l_addr[2], r_addr[2];434435struct ureg_dst l[2], r[2];436437--first_input;438439l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);440l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);441442r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);443r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);444445l[0] = ureg_DECL_temporary(shader);446l[1] = ureg_DECL_temporary(shader);447r[0] = ureg_DECL_temporary(shader);448r[1] = ureg_DECL_temporary(shader);449450fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);451fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);452453matrix_mul(shader, fragment, l, r);454455ureg_release_temporary(shader, l[0]);456ureg_release_temporary(shader, l[1]);457ureg_release_temporary(shader, r[0]);458ureg_release_temporary(shader, r[1]);459}460461static bool462init_shaders(struct vl_idct *idct)463{464idct->vs_mismatch = create_mismatch_vert_shader(idct);465if (!idct->vs_mismatch)466goto error_vs_mismatch;467468idct->fs_mismatch = create_mismatch_frag_shader(idct);469if (!idct->fs_mismatch)470goto error_fs_mismatch;471472idct->vs = create_stage1_vert_shader(idct);473if (!idct->vs)474goto error_vs;475476idct->fs = create_stage1_frag_shader(idct);477if (!idct->fs)478goto error_fs;479480return true;481482error_fs:483idct->pipe->delete_vs_state(idct->pipe, idct->vs);484485error_vs:486idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);487488error_fs_mismatch:489idct->pipe->delete_vs_state(idct->pipe, idct->fs);490491error_vs_mismatch:492return false;493}494495static void496cleanup_shaders(struct vl_idct *idct)497{498idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);499idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);500idct->pipe->delete_vs_state(idct->pipe, idct->vs);501idct->pipe->delete_fs_state(idct->pipe, idct->fs);502}503504static bool505init_state(struct vl_idct *idct)506{507struct pipe_blend_state blend;508struct pipe_rasterizer_state rs_state;509struct pipe_sampler_state sampler;510unsigned i;511512assert(idct);513514memset(&rs_state, 0, sizeof(rs_state));515rs_state.point_size = 1;516rs_state.half_pixel_center = true;517rs_state.bottom_edge_rule = true;518rs_state.depth_clip_near = 1;519rs_state.depth_clip_far = 1;520521idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);522if (!idct->rs_state)523goto error_rs_state;524525memset(&blend, 0, sizeof blend);526527blend.independent_blend_enable = 0;528blend.rt[0].blend_enable = 0;529blend.rt[0].rgb_func = PIPE_BLEND_ADD;530blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;531blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;532blend.rt[0].alpha_func = PIPE_BLEND_ADD;533blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;534blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;535blend.logicop_enable = 0;536blend.logicop_func = PIPE_LOGICOP_CLEAR;537/* Needed to allow color writes to FB, even if blending disabled */538blend.rt[0].colormask = PIPE_MASK_RGBA;539blend.dither = 0;540idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);541if (!idct->blend)542goto error_blend;543544for (i = 0; i < 2; ++i) {545memset(&sampler, 0, sizeof(sampler));546sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;547sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;548sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;549sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;550sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;551sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;552sampler.compare_mode = PIPE_TEX_COMPARE_NONE;553sampler.compare_func = PIPE_FUNC_ALWAYS;554sampler.normalized_coords = 1;555idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);556if (!idct->samplers[i])557goto error_samplers;558}559560return true;561562error_samplers:563for (i = 0; i < 2; ++i)564if (idct->samplers[i])565idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);566567idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);568569error_blend:570idct->pipe->delete_blend_state(idct->pipe, idct->blend);571572error_rs_state:573return false;574}575576static void577cleanup_state(struct vl_idct *idct)578{579unsigned i;580581for (i = 0; i < 2; ++i)582idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);583584idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);585idct->pipe->delete_blend_state(idct->pipe, idct->blend);586}587588static bool589init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)590{591struct pipe_resource *tex;592struct pipe_surface surf_templ;593594assert(idct && buffer);595596tex = buffer->sampler_views.individual.source->texture;597598buffer->fb_state_mismatch.width = tex->width0;599buffer->fb_state_mismatch.height = tex->height0;600buffer->fb_state_mismatch.nr_cbufs = 1;601602memset(&surf_templ, 0, sizeof(surf_templ));603surf_templ.format = tex->format;604surf_templ.u.tex.first_layer = 0;605surf_templ.u.tex.last_layer = 0;606buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);607608buffer->viewport_mismatch.scale[0] = tex->width0;609buffer->viewport_mismatch.scale[1] = tex->height0;610buffer->viewport_mismatch.scale[2] = 1;611buffer->viewport_mismatch.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;612buffer->viewport_mismatch.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;613buffer->viewport_mismatch.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;614buffer->viewport_mismatch.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;615616return true;617}618619static void620cleanup_source(struct vl_idct_buffer *buffer)621{622assert(buffer);623624pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);625626pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);627}628629static bool630init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)631{632struct pipe_resource *tex;633struct pipe_surface surf_templ;634unsigned i;635636assert(idct && buffer);637638tex = buffer->sampler_views.individual.intermediate->texture;639640buffer->fb_state.width = tex->width0;641buffer->fb_state.height = tex->height0;642buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;643for(i = 0; i < idct->nr_of_render_targets; ++i) {644memset(&surf_templ, 0, sizeof(surf_templ));645surf_templ.format = tex->format;646surf_templ.u.tex.first_layer = i;647surf_templ.u.tex.last_layer = i;648buffer->fb_state.cbufs[i] = idct->pipe->create_surface(649idct->pipe, tex, &surf_templ);650651if (!buffer->fb_state.cbufs[i])652goto error_surfaces;653}654655buffer->viewport.scale[0] = tex->width0;656buffer->viewport.scale[1] = tex->height0;657buffer->viewport.scale[2] = 1;658buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;659buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;660buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;661buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;662663return true;664665error_surfaces:666for(i = 0; i < idct->nr_of_render_targets; ++i)667pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);668669return false;670}671672static void673cleanup_intermediate(struct vl_idct_buffer *buffer)674{675unsigned i;676677assert(buffer);678679for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)680pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);681682pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);683}684685struct pipe_sampler_view *686vl_idct_upload_matrix(struct pipe_context *pipe, float scale)687{688struct pipe_resource tex_templ, *matrix;689struct pipe_sampler_view sv_templ, *sv;690struct pipe_transfer *buf_transfer;691unsigned i, j, pitch;692float *f;693694struct pipe_box rect =695{6960, 0, 0,697VL_BLOCK_WIDTH / 4,698VL_BLOCK_HEIGHT,6991700};701702assert(pipe);703704memset(&tex_templ, 0, sizeof(tex_templ));705tex_templ.target = PIPE_TEXTURE_2D;706tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;707tex_templ.last_level = 0;708tex_templ.width0 = 2;709tex_templ.height0 = 8;710tex_templ.depth0 = 1;711tex_templ.array_size = 1;712tex_templ.usage = PIPE_USAGE_IMMUTABLE;713tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;714tex_templ.flags = 0;715716matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);717if (!matrix)718goto error_matrix;719720f = pipe->texture_map(pipe, matrix, 0,721PIPE_MAP_WRITE |722PIPE_MAP_DISCARD_RANGE,723&rect, &buf_transfer);724if (!f)725goto error_map;726727pitch = buf_transfer->stride / sizeof(float);728729for(i = 0; i < VL_BLOCK_HEIGHT; ++i)730for(j = 0; j < VL_BLOCK_WIDTH; ++j)731// transpose and scale732f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;733734pipe->texture_unmap(pipe, buf_transfer);735736memset(&sv_templ, 0, sizeof(sv_templ));737u_sampler_view_default_template(&sv_templ, matrix, matrix->format);738sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);739pipe_resource_reference(&matrix, NULL);740if (!sv)741goto error_map;742743return sv;744745error_map:746pipe_resource_reference(&matrix, NULL);747748error_matrix:749return NULL;750}751752bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,753unsigned buffer_width, unsigned buffer_height,754unsigned nr_of_render_targets,755struct pipe_sampler_view *matrix,756struct pipe_sampler_view *transpose)757{758assert(idct && pipe);759assert(matrix && transpose);760761idct->pipe = pipe;762idct->buffer_width = buffer_width;763idct->buffer_height = buffer_height;764idct->nr_of_render_targets = nr_of_render_targets;765766pipe_sampler_view_reference(&idct->matrix, matrix);767pipe_sampler_view_reference(&idct->transpose, transpose);768769if(!init_shaders(idct))770return false;771772if(!init_state(idct)) {773cleanup_shaders(idct);774return false;775}776777return true;778}779780void781vl_idct_cleanup(struct vl_idct *idct)782{783cleanup_shaders(idct);784cleanup_state(idct);785786pipe_sampler_view_reference(&idct->matrix, NULL);787pipe_sampler_view_reference(&idct->transpose, NULL);788}789790bool791vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,792struct pipe_sampler_view *source,793struct pipe_sampler_view *intermediate)794{795assert(buffer && idct);796assert(source && intermediate);797798memset(buffer, 0, sizeof(struct vl_idct_buffer));799800pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);801pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);802pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);803pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);804805if (!init_source(idct, buffer))806return false;807808if (!init_intermediate(idct, buffer))809return false;810811return true;812}813814void815vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)816{817assert(buffer);818819cleanup_source(buffer);820cleanup_intermediate(buffer);821822pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);823pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);824}825826void827vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)828{829assert(buffer);830831idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);832idct->pipe->bind_blend_state(idct->pipe, idct->blend);833834idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,8350, 2, idct->samplers);836837idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0,838buffer->sampler_views.stage[0]);839840/* mismatch control */841idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);842idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);843idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);844idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);845util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);846847/* first stage */848idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);849idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);850idct->pipe->bind_vs_state(idct->pipe, idct->vs);851idct->pipe->bind_fs_state(idct->pipe, idct->fs);852util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);853}854855void856vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)857{858assert(buffer);859860/* second stage */861idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);862idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,8630, 2, idct->samplers);864idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT,8650, 2, 0, buffer->sampler_views.stage[1]);866}867868869870