Path: blob/21.2-virgl/src/gallium/drivers/radeon/radeon_uvd.c
4570 views
/**************************************************************************1*2* Copyright 2011 Advanced Micro Devices, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "radeon_uvd.h"2829#include "pipe/p_video_codec.h"30#include "radeon_video.h"31#include "radeonsi/si_pipe.h"32#include "util/u_memory.h"33#include "util/u_video.h"34#include "vl/vl_defines.h"35#include "vl/vl_mpeg12_decoder.h"36#include <sys/types.h>3738#include <assert.h>39#include <errno.h>40#include <stdio.h>41#include <unistd.h>4243#define NUM_BUFFERS 44445#define NUM_MPEG2_REFS 646#define NUM_H264_REFS 1747#define NUM_VC1_REFS 54849#define FB_BUFFER_OFFSET 0x100050#define FB_BUFFER_SIZE 204851#define FB_BUFFER_SIZE_TONGA (2048 * 64)52#define IT_SCALING_TABLE_SIZE 99253#define UVD_SESSION_CONTEXT_SIZE (128 * 1024)5455/* UVD decoder representation */56struct ruvd_decoder {57struct pipe_video_codec base;5859ruvd_set_dtb set_dtb;6061unsigned stream_handle;62unsigned stream_type;63unsigned frame_number;6465struct pipe_screen *screen;66struct radeon_winsys *ws;67struct radeon_cmdbuf cs;6869unsigned cur_buffer;7071struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];72struct ruvd_msg *msg;73uint32_t *fb;74unsigned fb_size;75uint8_t *it;7677struct rvid_buffer bs_buffers[NUM_BUFFERS];78void *bs_ptr;79unsigned bs_size;8081struct rvid_buffer dpb;82bool use_legacy;83struct rvid_buffer ctx;84struct rvid_buffer sessionctx;85struct {86unsigned data0;87unsigned data1;88unsigned cmd;89unsigned cntl;90} reg;9192void *render_pic_list[16];93};9495/* flush IB to the hardware */96static int flush(struct ruvd_decoder *dec, unsigned flags)97{98return dec->ws->cs_flush(&dec->cs, flags, NULL);99}100101/* add a new set register command to the IB */102static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)103{104radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0));105radeon_emit(&dec->cs, val);106}107108/* send a command to the VCPU through the GPCOM registers */109static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, struct pb_buffer *buf, uint32_t off,110enum radeon_bo_usage usage, enum radeon_bo_domain domain)111{112int reloc_idx;113114reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0);115if (!dec->use_legacy) {116uint64_t addr;117addr = dec->ws->buffer_get_virtual_address(buf);118addr = addr + off;119set_reg(dec, dec->reg.data0, addr);120set_reg(dec, dec->reg.data1, addr >> 32);121} else {122off += dec->ws->buffer_get_reloc_offset(buf);123set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);124set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);125}126set_reg(dec, dec->reg.cmd, cmd << 1);127}128129/* do the codec needs an IT buffer ?*/130static bool have_it(struct ruvd_decoder *dec)131{132return dec->stream_type == RUVD_CODEC_H264_PERF || dec->stream_type == RUVD_CODEC_H265;133}134135/* map the next available message/feedback/itscaling buffer */136static void map_msg_fb_it_buf(struct ruvd_decoder *dec)137{138struct rvid_buffer *buf;139uint8_t *ptr;140141/* grab the current message/feedback buffer */142buf = &dec->msg_fb_it_buffers[dec->cur_buffer];143144/* and map it for CPU access */145ptr =146dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);147148/* calc buffer offsets */149dec->msg = (struct ruvd_msg *)ptr;150memset(dec->msg, 0, sizeof(*dec->msg));151152dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);153if (have_it(dec))154dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);155}156157/* unmap and send a message command to the VCPU */158static void send_msg_buf(struct ruvd_decoder *dec)159{160struct rvid_buffer *buf;161162/* ignore the request if message/feedback buffer isn't mapped */163if (!dec->msg || !dec->fb)164return;165166/* grab the current message buffer */167buf = &dec->msg_fb_it_buffers[dec->cur_buffer];168169/* unmap the buffer */170dec->ws->buffer_unmap(dec->ws, buf->res->buf);171dec->msg = NULL;172dec->fb = NULL;173dec->it = NULL;174175if (dec->sessionctx.res)176send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0,177RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);178179/* and send it to the hardware */180send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);181}182183/* cycle to the next set of buffers */184static void next_buffer(struct ruvd_decoder *dec)185{186++dec->cur_buffer;187dec->cur_buffer %= NUM_BUFFERS;188}189190/* convert the profile into something UVD understands */191static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)192{193switch (u_reduce_video_profile(dec->base.profile)) {194case PIPE_VIDEO_FORMAT_MPEG4_AVC:195return (family >= CHIP_TONGA) ? RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;196197case PIPE_VIDEO_FORMAT_VC1:198return RUVD_CODEC_VC1;199200case PIPE_VIDEO_FORMAT_MPEG12:201return RUVD_CODEC_MPEG2;202203case PIPE_VIDEO_FORMAT_MPEG4:204return RUVD_CODEC_MPEG4;205206case PIPE_VIDEO_FORMAT_HEVC:207return RUVD_CODEC_H265;208209case PIPE_VIDEO_FORMAT_JPEG:210return RUVD_CODEC_MJPEG;211212default:213assert(0);214return 0;215}216}217218static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec)219{220unsigned width_in_mb, height_in_mb, ctx_size;221unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);222unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);223224unsigned max_references = dec->base.max_references + 1;225226// picture width & height in 16 pixel units227width_in_mb = width / VL_MACROBLOCK_WIDTH;228height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);229230if (!dec->use_legacy) {231unsigned fs_in_mb = width_in_mb * height_in_mb;232unsigned num_dpb_buffer;233switch (dec->base.level) {234case 30:235num_dpb_buffer = 8100 / fs_in_mb;236break;237case 31:238num_dpb_buffer = 18000 / fs_in_mb;239break;240case 32:241num_dpb_buffer = 20480 / fs_in_mb;242break;243case 41:244num_dpb_buffer = 32768 / fs_in_mb;245break;246case 42:247num_dpb_buffer = 34816 / fs_in_mb;248break;249case 50:250num_dpb_buffer = 110400 / fs_in_mb;251break;252case 51:253num_dpb_buffer = 184320 / fs_in_mb;254break;255default:256num_dpb_buffer = 184320 / fs_in_mb;257break;258}259num_dpb_buffer++;260max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);261ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);262} else {263// the firmware seems to always assume a minimum of ref frames264max_references = MAX2(NUM_H264_REFS, max_references);265// macroblock context buffer266ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256);267}268269return ctx_size;270}271272static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)273{274unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);275unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);276277unsigned max_references = dec->base.max_references + 1;278279if (dec->base.width * dec->base.height >= 4096 * 2000)280max_references = MAX2(max_references, 8);281else282max_references = MAX2(max_references, 17);283284width = align(width, 16);285height = align(height, 16);286return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;287}288289static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec,290struct pipe_h265_picture_desc *pic)291{292unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;293unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;294unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);295296unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);297unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);298unsigned coeff_10bit =299(pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;300301unsigned max_references = dec->base.max_references + 1;302303if (dec->base.width * dec->base.height >= 4096 * 2000)304max_references = MAX2(max_references, 8);305else306max_references = MAX2(max_references, 17);307308log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +309pic->pps->sps->log2_diff_max_min_luma_coding_block_size;310311width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;312height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;313314num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);315context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);316max_mb_address = (unsigned)ceil(height * 8 / 2048.0);317318cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;319db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);320321return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;322}323324static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)325{326if (((struct si_screen *)dec->screen)->info.family < CHIP_VEGA10)327return 16;328else329return 32;330}331332/* calculate size of reference picture buffer */333static unsigned calc_dpb_size(struct ruvd_decoder *dec)334{335unsigned width_in_mb, height_in_mb, image_size, dpb_size;336337// always align them to MB size for dpb calculation338unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);339unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);340341// always one more for currently decoded picture342unsigned max_references = dec->base.max_references + 1;343344// aligned size of a single frame345image_size = align(width, get_db_pitch_alignment(dec)) * height;346image_size += image_size / 2;347image_size = align(image_size, 1024);348349// picture width & height in 16 pixel units350width_in_mb = width / VL_MACROBLOCK_WIDTH;351height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);352353switch (u_reduce_video_profile(dec->base.profile)) {354case PIPE_VIDEO_FORMAT_MPEG4_AVC: {355if (!dec->use_legacy) {356unsigned fs_in_mb = width_in_mb * height_in_mb;357unsigned alignment = 64, num_dpb_buffer;358359if (dec->stream_type == RUVD_CODEC_H264_PERF)360alignment = 256;361switch (dec->base.level) {362case 30:363num_dpb_buffer = 8100 / fs_in_mb;364break;365case 31:366num_dpb_buffer = 18000 / fs_in_mb;367break;368case 32:369num_dpb_buffer = 20480 / fs_in_mb;370break;371case 41:372num_dpb_buffer = 32768 / fs_in_mb;373break;374case 42:375num_dpb_buffer = 34816 / fs_in_mb;376break;377case 50:378num_dpb_buffer = 110400 / fs_in_mb;379break;380case 51:381num_dpb_buffer = 184320 / fs_in_mb;382break;383default:384num_dpb_buffer = 184320 / fs_in_mb;385break;386}387num_dpb_buffer++;388max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);389dpb_size = image_size * max_references;390if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||391(((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) {392dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);393dpb_size += align(width_in_mb * height_in_mb * 32, alignment);394}395} else {396// the firmware seems to allways assume a minimum of ref frames397max_references = MAX2(NUM_H264_REFS, max_references);398// reference picture buffer399dpb_size = image_size * max_references;400if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||401(((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) {402// macroblock context buffer403dpb_size += width_in_mb * height_in_mb * max_references * 192;404// IT surface buffer405dpb_size += width_in_mb * height_in_mb * 32;406}407}408break;409}410411case PIPE_VIDEO_FORMAT_HEVC:412if (dec->base.width * dec->base.height >= 4096 * 2000)413max_references = MAX2(max_references, 8);414else415max_references = MAX2(max_references, 17);416417width = align(width, 16);418height = align(height, 16);419if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)420dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) *421max_references;422else423dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) *424max_references;425break;426427case PIPE_VIDEO_FORMAT_VC1:428// the firmware seems to allways assume a minimum of ref frames429max_references = MAX2(NUM_VC1_REFS, max_references);430431// reference picture buffer432dpb_size = image_size * max_references;433434// CONTEXT_BUFFER435dpb_size += width_in_mb * height_in_mb * 128;436437// IT surface buffer438dpb_size += width_in_mb * 64;439440// DB surface buffer441dpb_size += width_in_mb * 128;442443// BP444dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);445break;446447case PIPE_VIDEO_FORMAT_MPEG12:448// reference picture buffer, must be big enough for all frames449dpb_size = image_size * NUM_MPEG2_REFS;450break;451452case PIPE_VIDEO_FORMAT_MPEG4:453// reference picture buffer454dpb_size = image_size * max_references;455456// CM457dpb_size += width_in_mb * height_in_mb * 64;458459// IT surface buffer460dpb_size += align(width_in_mb * height_in_mb * 32, 64);461462dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);463break;464465case PIPE_VIDEO_FORMAT_JPEG:466dpb_size = 0;467break;468469default:470// something is missing here471assert(0);472473// at least use a sane default value474dpb_size = 32 * 1024 * 1024;475break;476}477return dpb_size;478}479480/* free associated data in the video buffer callback */481static void ruvd_destroy_associated_data(void *data)482{483/* NOOP, since we only use an intptr */484}485486/* get h264 specific message bits */487static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)488{489struct ruvd_h264 result;490491memset(&result, 0, sizeof(result));492switch (pic->base.profile) {493case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:494case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:495result.profile = RUVD_H264_PROFILE_BASELINE;496break;497498case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:499result.profile = RUVD_H264_PROFILE_MAIN;500break;501502case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:503result.profile = RUVD_H264_PROFILE_HIGH;504break;505506default:507assert(0);508break;509}510511result.level = dec->base.level;512513result.sps_info_flags = 0;514result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;515result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;516result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;517result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;518519result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;520result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;521result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;522result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;523result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;524525switch (dec->base.chroma_format) {526case PIPE_VIDEO_CHROMA_FORMAT_NONE:527/* TODO: assert? */528break;529case PIPE_VIDEO_CHROMA_FORMAT_400:530result.chroma_format = 0;531break;532case PIPE_VIDEO_CHROMA_FORMAT_420:533result.chroma_format = 1;534break;535case PIPE_VIDEO_CHROMA_FORMAT_422:536result.chroma_format = 2;537break;538case PIPE_VIDEO_CHROMA_FORMAT_444:539result.chroma_format = 3;540break;541}542543result.pps_info_flags = 0;544result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;545result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;546result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;547result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;548result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;549result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;550result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;551result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;552553result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;554result.slice_group_map_type = pic->pps->slice_group_map_type;555result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;556result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;557result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;558result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;559560memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16);561memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64);562563if (dec->stream_type == RUVD_CODEC_H264_PERF) {564memcpy(dec->it, result.scaling_list_4x4, 6 * 16);565memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64);566}567568result.num_ref_frames = pic->num_ref_frames;569570result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;571result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;572573result.frame_num = pic->frame_num;574memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16);575result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];576result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];577memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2);578579result.decoded_pic_idx = pic->frame_num;580581return result;582}583584/* get h265 specific message bits */585static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,586struct pipe_h265_picture_desc *pic)587{588struct ruvd_h265 result;589unsigned i, j;590591memset(&result, 0, sizeof(result));592593result.sps_info_flags = 0;594result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;595result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;596result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;597result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;598result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;599result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;600result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;601result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;602result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;603if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO)604result.sps_info_flags |= 1 << 9;605if (pic->UseRefPicList == true)606result.sps_info_flags |= 1 << 10;607608result.chroma_format = pic->pps->sps->chroma_format_idc;609result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;610result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;611result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;612result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;613result.log2_min_luma_coding_block_size_minus3 =614pic->pps->sps->log2_min_luma_coding_block_size_minus3;615result.log2_diff_max_min_luma_coding_block_size =616pic->pps->sps->log2_diff_max_min_luma_coding_block_size;617result.log2_min_transform_block_size_minus2 =618pic->pps->sps->log2_min_transform_block_size_minus2;619result.log2_diff_max_min_transform_block_size =620pic->pps->sps->log2_diff_max_min_transform_block_size;621result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;622result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;623result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;624result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;625result.log2_min_pcm_luma_coding_block_size_minus3 =626pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;627result.log2_diff_max_min_pcm_luma_coding_block_size =628pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;629result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;630631result.pps_info_flags = 0;632result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;633result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;634result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;635result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;636result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;637result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;638result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;639result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;640result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;641result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;642result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;643result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;644result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;645result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;646result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;647result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;648result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;649result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;650result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;651result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;652// result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???653654result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;655result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;656result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;657result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;658result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;659result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;660result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;661result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;662result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;663result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;664result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;665result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;666result.init_qp_minus26 = pic->pps->init_qp_minus26;667668for (i = 0; i < 19; ++i)669result.column_width_minus1[i] = pic->pps->column_width_minus1[i];670671for (i = 0; i < 21; ++i)672result.row_height_minus1[i] = pic->pps->row_height_minus1[i];673674result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;675result.curr_poc = pic->CurrPicOrderCntVal;676677for (i = 0; i < 16; i++) {678for (j = 0; (pic->ref[j] != NULL) && (j < 16); j++) {679if (dec->render_pic_list[i] == pic->ref[j])680break;681if (j == 15)682dec->render_pic_list[i] = NULL;683else if (pic->ref[j + 1] == NULL)684dec->render_pic_list[i] = NULL;685}686}687for (i = 0; i < 16; i++) {688if (dec->render_pic_list[i] == NULL) {689dec->render_pic_list[i] = target;690result.curr_idx = i;691break;692}693}694695vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx,696&ruvd_destroy_associated_data);697698for (i = 0; i < 16; ++i) {699struct pipe_video_buffer *ref = pic->ref[i];700uintptr_t ref_pic = 0;701702result.poc_list[i] = pic->PicOrderCntVal[i];703704if (ref)705ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);706else707ref_pic = 0x7F;708result.ref_pic_list[i] = ref_pic;709}710711for (i = 0; i < 8; ++i) {712result.ref_pic_set_st_curr_before[i] = 0xFF;713result.ref_pic_set_st_curr_after[i] = 0xFF;714result.ref_pic_set_lt_curr[i] = 0xFF;715}716717for (i = 0; i < pic->NumPocStCurrBefore; ++i)718result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];719720for (i = 0; i < pic->NumPocStCurrAfter; ++i)721result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];722723for (i = 0; i < pic->NumPocLtCurr; ++i)724result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];725726for (i = 0; i < 6; ++i)727result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];728729for (i = 0; i < 2; ++i)730result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];731732memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);733memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);734memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);735memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);736737for (i = 0; i < 2; i++) {738for (j = 0; j < 15; j++)739result.direct_reflist[i][j] = pic->RefPicList[i][j];740}741742if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {743if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {744result.p010_mode = 1;745result.msb_mode = 1;746} else {747result.luma_10to8 = 5;748result.chroma_10to8 = 5;749result.sclr_luma10to8 = 4;750result.sclr_chroma10to8 = 4;751}752}753754/* TODO755result.highestTid;756result.isNonRef;757758IDRPicFlag;759RAPPicFlag;760NumPocTotalCurr;761NumShortTermPictureSliceHeaderBits;762NumLongTermPictureSliceHeaderBits;763764IsLongTerm[16];765*/766767return result;768}769770/* get vc1 specific message bits */771static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)772{773struct ruvd_vc1 result;774775memset(&result, 0, sizeof(result));776777switch (pic->base.profile) {778case PIPE_VIDEO_PROFILE_VC1_SIMPLE:779result.profile = RUVD_VC1_PROFILE_SIMPLE;780result.level = 1;781break;782783case PIPE_VIDEO_PROFILE_VC1_MAIN:784result.profile = RUVD_VC1_PROFILE_MAIN;785result.level = 2;786break;787788case PIPE_VIDEO_PROFILE_VC1_ADVANCED:789result.profile = RUVD_VC1_PROFILE_ADVANCED;790result.level = 4;791break;792793default:794assert(0);795}796797/* fields common for all profiles */798result.sps_info_flags |= pic->postprocflag << 7;799result.sps_info_flags |= pic->pulldown << 6;800result.sps_info_flags |= pic->interlace << 5;801result.sps_info_flags |= pic->tfcntrflag << 4;802result.sps_info_flags |= pic->finterpflag << 3;803result.sps_info_flags |= pic->psf << 1;804805result.pps_info_flags |= pic->range_mapy_flag << 31;806result.pps_info_flags |= pic->range_mapy << 28;807result.pps_info_flags |= pic->range_mapuv_flag << 27;808result.pps_info_flags |= pic->range_mapuv << 24;809result.pps_info_flags |= pic->multires << 21;810result.pps_info_flags |= pic->maxbframes << 16;811result.pps_info_flags |= pic->overlap << 11;812result.pps_info_flags |= pic->quantizer << 9;813result.pps_info_flags |= pic->panscan_flag << 7;814result.pps_info_flags |= pic->refdist_flag << 6;815result.pps_info_flags |= pic->vstransform << 0;816817/* some fields only apply to main/advanced profile */818if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {819result.pps_info_flags |= pic->syncmarker << 20;820result.pps_info_flags |= pic->rangered << 19;821result.pps_info_flags |= pic->loopfilter << 5;822result.pps_info_flags |= pic->fastuvmc << 4;823result.pps_info_flags |= pic->extended_mv << 3;824result.pps_info_flags |= pic->extended_dmv << 8;825result.pps_info_flags |= pic->dquant << 1;826}827828result.chroma_format = 1;829830#if 0831//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)832uint32_t slice_count833uint8_t picture_type834uint8_t frame_coding_mode835uint8_t deblockEnable836uint8_t pquant837#endif838839return result;840}841842/* extract the frame number from a referenced video buffer */843static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)844{845uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;846uint32_t max = MAX2(dec->frame_number, 1) - 1;847uintptr_t frame;848849/* seems to be the most sane fallback */850if (!ref)851return max;852853/* get the frame number from the associated data */854frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);855856/* limit the frame number to a valid range */857return MAX2(MIN2(frame, max), min);858}859860/* get mpeg2 specific msg bits */861static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,862struct pipe_mpeg12_picture_desc *pic)863{864const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;865struct ruvd_mpeg2 result;866unsigned i;867868memset(&result, 0, sizeof(result));869result.decoded_pic_idx = dec->frame_number;870for (i = 0; i < 2; ++i)871result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);872873if (pic->intra_matrix) {874result.load_intra_quantiser_matrix = 1;875for (i = 0; i < 64; ++i) {876result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];877}878}879if (pic->non_intra_matrix) {880result.load_nonintra_quantiser_matrix = 1;881for (i = 0; i < 64; ++i) {882result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];883}884}885886result.profile_and_level_indication = 0;887result.chroma_format = 0x1;888889result.picture_coding_type = pic->picture_coding_type;890result.f_code[0][0] = pic->f_code[0][0] + 1;891result.f_code[0][1] = pic->f_code[0][1] + 1;892result.f_code[1][0] = pic->f_code[1][0] + 1;893result.f_code[1][1] = pic->f_code[1][1] + 1;894result.intra_dc_precision = pic->intra_dc_precision;895result.pic_structure = pic->picture_structure;896result.top_field_first = pic->top_field_first;897result.frame_pred_frame_dct = pic->frame_pred_frame_dct;898result.concealment_motion_vectors = pic->concealment_motion_vectors;899result.q_scale_type = pic->q_scale_type;900result.intra_vlc_format = pic->intra_vlc_format;901result.alternate_scan = pic->alternate_scan;902903return result;904}905906/* get mpeg4 specific msg bits */907static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,908struct pipe_mpeg4_picture_desc *pic)909{910struct ruvd_mpeg4 result;911unsigned i;912913memset(&result, 0, sizeof(result));914result.decoded_pic_idx = dec->frame_number;915for (i = 0; i < 2; ++i)916result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);917918result.variant_type = 0;919result.profile_and_level_indication = 0xF0; // ASP Level0920921result.video_object_layer_verid = 0x5; // advanced simple922result.video_object_layer_shape = 0x0; // rectangular923924result.video_object_layer_width = dec->base.width;925result.video_object_layer_height = dec->base.height;926927result.vop_time_increment_resolution = pic->vop_time_increment_resolution;928929result.flags |= pic->short_video_header << 0;930// result.flags |= obmc_disable << 1;931result.flags |= pic->interlaced << 2;932result.flags |= 1 << 3; // load_intra_quant_mat933result.flags |= 1 << 4; // load_nonintra_quant_mat934result.flags |= pic->quarter_sample << 5;935result.flags |= 1 << 6; // complexity_estimation_disable936result.flags |= pic->resync_marker_disable << 7;937// result.flags |= data_partitioned << 8;938// result.flags |= reversible_vlc << 9;939result.flags |= 0 << 10; // newpred_enable940result.flags |= 0 << 11; // reduced_resolution_vop_enable941// result.flags |= scalability << 12;942// result.flags |= is_object_layer_identifier << 13;943// result.flags |= fixed_vop_rate << 14;944// result.flags |= newpred_segment_type << 15;945946result.quant_type = pic->quant_type;947948for (i = 0; i < 64; ++i) {949result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];950result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];951}952953/*954int32_t trd [2]955int32_t trb [2]956uint8_t vop_coding_type957uint8_t vop_fcode_forward958uint8_t vop_fcode_backward959uint8_t rounding_control960uint8_t alternate_vertical_scan_flag961uint8_t top_field_first962*/963964return result;965}966967/**968* destroy this video decoder969*/970static void ruvd_destroy(struct pipe_video_codec *decoder)971{972struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;973unsigned i;974975assert(decoder);976977map_msg_fb_it_buf(dec);978dec->msg->size = sizeof(*dec->msg);979dec->msg->msg_type = RUVD_MSG_DESTROY;980dec->msg->stream_handle = dec->stream_handle;981send_msg_buf(dec);982983flush(dec, 0);984985dec->ws->cs_destroy(&dec->cs);986987for (i = 0; i < NUM_BUFFERS; ++i) {988si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);989si_vid_destroy_buffer(&dec->bs_buffers[i]);990}991992si_vid_destroy_buffer(&dec->dpb);993si_vid_destroy_buffer(&dec->ctx);994si_vid_destroy_buffer(&dec->sessionctx);995996FREE(dec);997}998999/**1000* start decoding of a new frame1001*/1002static void ruvd_begin_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,1003struct pipe_picture_desc *picture)1004{1005struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;1006uintptr_t frame;10071008assert(decoder);10091010frame = ++dec->frame_number;1011vl_video_buffer_set_associated_data(target, decoder, (void *)frame,1012&ruvd_destroy_associated_data);10131014dec->bs_size = 0;1015dec->bs_ptr = dec->ws->buffer_map(dec->ws, dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs,1016PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);1017}10181019/**1020* decode a macroblock1021*/1022static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,1023struct pipe_video_buffer *target,1024struct pipe_picture_desc *picture,1025const struct pipe_macroblock *macroblocks,1026unsigned num_macroblocks)1027{1028/* not supported (yet) */1029assert(0);1030}10311032/**1033* decode a bitstream1034*/1035static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,1036struct pipe_video_buffer *target,1037struct pipe_picture_desc *picture, unsigned num_buffers,1038const void *const *buffers, const unsigned *sizes)1039{1040struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;1041unsigned i;10421043assert(decoder);10441045if (!dec->bs_ptr)1046return;10471048for (i = 0; i < num_buffers; ++i) {1049struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];1050unsigned new_size = dec->bs_size + sizes[i];10511052if (new_size > buf->res->buf->size) {1053dec->ws->buffer_unmap(dec->ws, buf->res->buf);1054if (!si_vid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) {1055RVID_ERR("Can't resize bitstream buffer!");1056return;1057}10581059dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,1060PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);1061if (!dec->bs_ptr)1062return;10631064dec->bs_ptr += dec->bs_size;1065}10661067memcpy(dec->bs_ptr, buffers[i], sizes[i]);1068dec->bs_size += sizes[i];1069dec->bs_ptr += sizes[i];1070}1071}10721073/**1074* end decoding of the current frame1075*/1076static void ruvd_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,1077struct pipe_picture_desc *picture)1078{1079struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;1080struct pb_buffer *dt;1081struct rvid_buffer *msg_fb_it_buf, *bs_buf;1082unsigned bs_size;10831084assert(decoder);10851086if (!dec->bs_ptr)1087return;10881089msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];1090bs_buf = &dec->bs_buffers[dec->cur_buffer];10911092bs_size = align(dec->bs_size, 128);1093memset(dec->bs_ptr, 0, bs_size - dec->bs_size);1094dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);10951096map_msg_fb_it_buf(dec);1097dec->msg->size = sizeof(*dec->msg);1098dec->msg->msg_type = RUVD_MSG_DECODE;1099dec->msg->stream_handle = dec->stream_handle;1100dec->msg->status_report_feedback_number = dec->frame_number;11011102dec->msg->body.decode.stream_type = dec->stream_type;1103dec->msg->body.decode.decode_flags = 0x1;1104dec->msg->body.decode.width_in_samples = dec->base.width;1105dec->msg->body.decode.height_in_samples = dec->base.height;11061107if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||1108(picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {1109dec->msg->body.decode.width_in_samples =1110align(dec->msg->body.decode.width_in_samples, 16) / 16;1111dec->msg->body.decode.height_in_samples =1112align(dec->msg->body.decode.height_in_samples, 16) / 16;1113}11141115if (dec->dpb.res)1116dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;1117dec->msg->body.decode.bsd_size = bs_size;1118dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));11191120if (dec->stream_type == RUVD_CODEC_H264_PERF &&1121((struct si_screen *)dec->screen)->info.family >= CHIP_POLARIS10)1122dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;11231124dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);1125if (((struct si_screen *)dec->screen)->info.family >= CHIP_STONEY)1126dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;11271128switch (u_reduce_video_profile(picture->profile)) {1129case PIPE_VIDEO_FORMAT_MPEG4_AVC:1130dec->msg->body.decode.codec.h264 =1131get_h264_msg(dec, (struct pipe_h264_picture_desc *)picture);1132break;11331134case PIPE_VIDEO_FORMAT_HEVC:1135dec->msg->body.decode.codec.h265 =1136get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture);1137if (dec->ctx.res == NULL) {1138unsigned ctx_size;1139if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)1140ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture);1141else1142ctx_size = calc_ctx_size_h265_main(dec);1143if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {1144RVID_ERR("Can't allocated context buffer.\n");1145}1146si_vid_clear_buffer(decoder->context, &dec->ctx);1147}11481149if (dec->ctx.res)1150dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;1151break;11521153case PIPE_VIDEO_FORMAT_VC1:1154dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture);1155break;11561157case PIPE_VIDEO_FORMAT_MPEG12:1158dec->msg->body.decode.codec.mpeg2 =1159get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture);1160break;11611162case PIPE_VIDEO_FORMAT_MPEG4:1163dec->msg->body.decode.codec.mpeg4 =1164get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture);1165break;11661167case PIPE_VIDEO_FORMAT_JPEG:1168break;11691170default:1171assert(0);1172return;1173}11741175dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;1176dec->msg->body.decode.extension_support = 0x1;11771178/* set at least the feedback buffer size */1179dec->fb[0] = dec->fb_size;11801181send_msg_buf(dec);11821183if (dec->dpb.res)1184send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,1185RADEON_DOMAIN_VRAM);11861187if (dec->ctx.res)1188send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE,1189RADEON_DOMAIN_VRAM);1190send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ,1191RADEON_DOMAIN_GTT);1192send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);1193send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, FB_BUFFER_OFFSET,1194RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);1195if (have_it(dec))1196send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,1197FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);1198set_reg(dec, dec->reg.cntl, 1);11991200flush(dec, PIPE_FLUSH_ASYNC);1201next_buffer(dec);1202}12031204/**1205* flush any outstanding command buffers to the hardware1206*/1207static void ruvd_flush(struct pipe_video_codec *decoder)1208{1209}12101211/**1212* create and UVD decoder1213*/1214struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,1215const struct pipe_video_codec *templ,1216ruvd_set_dtb set_dtb)1217{1218struct si_context *sctx = (struct si_context *)context;1219struct radeon_winsys *ws = sctx->ws;1220unsigned dpb_size;1221unsigned width = templ->width, height = templ->height;1222unsigned bs_buf_size;1223struct ruvd_decoder *dec;1224int r, i;12251226switch (u_reduce_video_profile(templ->profile)) {1227case PIPE_VIDEO_FORMAT_MPEG12:1228if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)1229return vl_create_mpeg12_decoder(context, templ);12301231FALLTHROUGH;1232case PIPE_VIDEO_FORMAT_MPEG4:1233width = align(width, VL_MACROBLOCK_WIDTH);1234height = align(height, VL_MACROBLOCK_HEIGHT);1235break;1236case PIPE_VIDEO_FORMAT_MPEG4_AVC:1237width = align(width, VL_MACROBLOCK_WIDTH);1238height = align(height, VL_MACROBLOCK_HEIGHT);1239break;12401241default:1242break;1243}12441245dec = CALLOC_STRUCT(ruvd_decoder);12461247if (!dec)1248return NULL;12491250if (!sctx->screen->info.is_amdgpu)1251dec->use_legacy = true;12521253dec->base = *templ;1254dec->base.context = context;1255dec->base.width = width;1256dec->base.height = height;12571258dec->base.destroy = ruvd_destroy;1259dec->base.begin_frame = ruvd_begin_frame;1260dec->base.decode_macroblock = ruvd_decode_macroblock;1261dec->base.decode_bitstream = ruvd_decode_bitstream;1262dec->base.end_frame = ruvd_end_frame;1263dec->base.flush = ruvd_flush;12641265dec->stream_type = profile2stream_type(dec, sctx->family);1266dec->set_dtb = set_dtb;1267dec->stream_handle = si_vid_alloc_stream_handle();1268dec->screen = context->screen;1269dec->ws = ws;12701271if (!ws->cs_create(&dec->cs, sctx->ctx, RING_UVD, NULL, NULL, false)) {1272RVID_ERR("Can't get command submission context.\n");1273goto error;1274}12751276for (i = 0; i < 16; i++)1277dec->render_pic_list[i] = NULL;1278dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;1279bs_buf_size = width * height * (512 / (16 * 16));1280for (i = 0; i < NUM_BUFFERS; ++i) {1281unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;1282STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);1283if (have_it(dec))1284msg_fb_it_size += IT_SCALING_TABLE_SIZE;1285if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], msg_fb_it_size,1286PIPE_USAGE_STAGING)) {1287RVID_ERR("Can't allocated message buffers.\n");1288goto error;1289}12901291if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size,1292PIPE_USAGE_STAGING)) {1293RVID_ERR("Can't allocated bitstream buffers.\n");1294goto error;1295}12961297si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);1298si_vid_clear_buffer(context, &dec->bs_buffers[i]);1299}13001301dpb_size = calc_dpb_size(dec);1302if (dpb_size) {1303if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {1304RVID_ERR("Can't allocated dpb.\n");1305goto error;1306}1307si_vid_clear_buffer(context, &dec->dpb);1308}13091310if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) {1311unsigned ctx_size = calc_ctx_size_h264_perf(dec);1312if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {1313RVID_ERR("Can't allocated context buffer.\n");1314goto error;1315}1316si_vid_clear_buffer(context, &dec->ctx);1317}13181319if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) {1320if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, UVD_SESSION_CONTEXT_SIZE,1321PIPE_USAGE_DEFAULT)) {1322RVID_ERR("Can't allocated session ctx.\n");1323goto error;1324}1325si_vid_clear_buffer(context, &dec->sessionctx);1326}13271328if (sctx->family >= CHIP_VEGA10) {1329dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;1330dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;1331dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;1332dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15;1333} else {1334dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;1335dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;1336dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;1337dec->reg.cntl = RUVD_ENGINE_CNTL;1338}13391340map_msg_fb_it_buf(dec);1341dec->msg->size = sizeof(*dec->msg);1342dec->msg->msg_type = RUVD_MSG_CREATE;1343dec->msg->stream_handle = dec->stream_handle;1344dec->msg->body.create.stream_type = dec->stream_type;1345dec->msg->body.create.width_in_samples = dec->base.width;1346dec->msg->body.create.height_in_samples = dec->base.height;1347dec->msg->body.create.dpb_size = dpb_size;1348send_msg_buf(dec);1349r = flush(dec, 0);1350if (r)1351goto error;13521353next_buffer(dec);13541355return &dec->base;13561357error:1358dec->ws->cs_destroy(&dec->cs);13591360for (i = 0; i < NUM_BUFFERS; ++i) {1361si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);1362si_vid_destroy_buffer(&dec->bs_buffers[i]);1363}13641365si_vid_destroy_buffer(&dec->dpb);1366si_vid_destroy_buffer(&dec->ctx);1367si_vid_destroy_buffer(&dec->sessionctx);13681369FREE(dec);13701371return NULL;1372}13731374/* calculate top/bottom offset */1375static unsigned texture_offset(struct radeon_surf *surface, unsigned layer,1376enum ruvd_surface_type type)1377{1378switch (type) {1379default:1380case RUVD_SURFACE_TYPE_LEGACY:1381return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +1382layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;1383break;1384case RUVD_SURFACE_TYPE_GFX9:1385return surface->u.gfx9.surf_offset + layer * surface->u.gfx9.surf_slice_size;1386break;1387}1388}13891390/* hw encode the aspect of macro tiles */1391static unsigned macro_tile_aspect(unsigned macro_tile_aspect)1392{1393switch (macro_tile_aspect) {1394default:1395case 1:1396macro_tile_aspect = 0;1397break;1398case 2:1399macro_tile_aspect = 1;1400break;1401case 4:1402macro_tile_aspect = 2;1403break;1404case 8:1405macro_tile_aspect = 3;1406break;1407}1408return macro_tile_aspect;1409}14101411/* hw encode the bank width and height */1412static unsigned bank_wh(unsigned bankwh)1413{1414switch (bankwh) {1415default:1416case 1:1417bankwh = 0;1418break;1419case 2:1420bankwh = 1;1421break;1422case 4:1423bankwh = 2;1424break;1425case 8:1426bankwh = 3;1427break;1428}1429return bankwh;1430}14311432/**1433* fill decoding target field from the luma and chroma surfaces1434*/1435void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,1436struct radeon_surf *chroma, enum ruvd_surface_type type)1437{1438switch (type) {1439default:1440case RUVD_SURFACE_TYPE_LEGACY:1441msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;1442switch (luma->u.legacy.level[0].mode) {1443case RADEON_SURF_MODE_LINEAR_ALIGNED:1444msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;1445msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;1446break;1447case RADEON_SURF_MODE_1D:1448msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;1449msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;1450break;1451case RADEON_SURF_MODE_2D:1452msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;1453msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;1454break;1455default:1456assert(0);1457break;1458}14591460msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);1461if (chroma)1462msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);1463if (msg->body.decode.dt_field_mode) {1464msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);1465if (chroma)1466msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);1467} else {1468msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;1469msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;1470}14711472if (chroma) {1473assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);1474assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);1475assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);1476}14771478msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));1479msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));1480msg->body.decode.dt_surf_tile_config |=1481RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));1482break;1483case RUVD_SURFACE_TYPE_GFX9:1484msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w;1485/* SWIZZLE LINEAR MODE */1486msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;1487msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;1488msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);1489msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);1490if (msg->body.decode.dt_field_mode) {1491msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);1492msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);1493} else {1494msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;1495msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;1496}1497msg->body.decode.dt_surf_tile_config = 0;1498break;1499}1500}150115021503