Path: blob/21.2-virgl/src/gallium/drivers/r600/radeon_uvd.c
4570 views
/**************************************************************************1*2* Copyright 2011 Advanced Micro Devices, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627/*28* Authors:29* Christian König <[email protected]>30*31*/3233#include <sys/types.h>34#include <assert.h>35#include <errno.h>36#include <unistd.h>37#include <stdio.h>3839#include "pipe/p_video_codec.h"4041#include "util/u_memory.h"42#include "util/u_video.h"4344#include "vl/vl_defines.h"45#include "vl/vl_mpeg12_decoder.h"4647#include "r600_pipe_common.h"48#include "radeon_video.h"49#include "radeon_uvd.h"5051#define NUM_BUFFERS 45253#define NUM_MPEG2_REFS 654#define NUM_H264_REFS 1755#define NUM_VC1_REFS 55657#define FB_BUFFER_OFFSET 0x100058#define FB_BUFFER_SIZE 204859#define FB_BUFFER_SIZE_TONGA (2048 * 64)60#define IT_SCALING_TABLE_SIZE 99261#define UVD_SESSION_CONTEXT_SIZE (128 * 1024)6263/* UVD decoder representation */64struct ruvd_decoder {65struct pipe_video_codec base;6667ruvd_set_dtb set_dtb;6869unsigned stream_handle;70unsigned stream_type;71unsigned frame_number;7273struct pipe_screen *screen;74struct radeon_winsys* ws;75struct radeon_cmdbuf cs;7677unsigned cur_buffer;7879struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];80struct ruvd_msg *msg;81uint32_t *fb;82unsigned fb_size;83uint8_t *it;8485struct rvid_buffer bs_buffers[NUM_BUFFERS];86void* bs_ptr;87unsigned bs_size;8889struct rvid_buffer dpb;90bool use_legacy;91struct rvid_buffer ctx;92struct rvid_buffer sessionctx;93struct {94unsigned data0;95unsigned data1;96unsigned cmd;97unsigned cntl;98} reg;99};100101/* flush IB to the hardware */102static int flush(struct ruvd_decoder *dec, unsigned flags)103{104return dec->ws->cs_flush(&dec->cs, flags, NULL);105}106107/* add a new set register command to the IB */108static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)109{110radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0));111radeon_emit(&dec->cs, val);112}113114/* send a command to the VCPU through the GPCOM registers */115static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,116struct pb_buffer* buf, uint32_t off,117enum radeon_bo_usage usage, enum radeon_bo_domain domain)118{119int reloc_idx;120121reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,122domain, 0);123if (!dec->use_legacy) {124uint64_t addr;125addr = dec->ws->buffer_get_virtual_address(buf);126addr = addr + off;127set_reg(dec, dec->reg.data0, addr);128set_reg(dec, dec->reg.data1, addr >> 32);129} else {130off += dec->ws->buffer_get_reloc_offset(buf);131set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);132set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);133}134set_reg(dec, dec->reg.cmd, cmd << 1);135}136137/* do the codec needs an IT buffer ?*/138static bool have_it(struct ruvd_decoder *dec)139{140return dec->stream_type == RUVD_CODEC_H264_PERF ||141dec->stream_type == RUVD_CODEC_H265;142}143144/* map the next available message/feedback/itscaling buffer */145static void map_msg_fb_it_buf(struct ruvd_decoder *dec)146{147struct rvid_buffer* buf;148uint8_t *ptr;149150/* grab the current message/feedback buffer */151buf = &dec->msg_fb_it_buffers[dec->cur_buffer];152153/* and map it for CPU access */154ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,155PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);156157/* calc buffer offsets */158dec->msg = (struct ruvd_msg *)ptr;159memset(dec->msg, 0, sizeof(*dec->msg));160161dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);162if (have_it(dec))163dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);164}165166/* unmap and send a message command to the VCPU */167static void send_msg_buf(struct ruvd_decoder *dec)168{169struct rvid_buffer* buf;170171/* ignore the request if message/feedback buffer isn't mapped */172if (!dec->msg || !dec->fb)173return;174175/* grab the current message buffer */176buf = &dec->msg_fb_it_buffers[dec->cur_buffer];177178/* unmap the buffer */179dec->ws->buffer_unmap(dec->ws, buf->res->buf);180dec->bs_ptr = NULL;181dec->msg = NULL;182dec->fb = NULL;183dec->it = NULL;184185186if (dec->sessionctx.res)187send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER,188dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE,189RADEON_DOMAIN_VRAM);190191/* and send it to the hardware */192send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0,193RADEON_USAGE_READ, RADEON_DOMAIN_GTT);194}195196/* cycle to the next set of buffers */197static void next_buffer(struct ruvd_decoder *dec)198{199++dec->cur_buffer;200dec->cur_buffer %= NUM_BUFFERS;201}202203/* convert the profile into something UVD understands */204static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)205{206switch (u_reduce_video_profile(dec->base.profile)) {207case PIPE_VIDEO_FORMAT_MPEG4_AVC:208return RUVD_CODEC_H264;209210case PIPE_VIDEO_FORMAT_VC1:211return RUVD_CODEC_VC1;212213case PIPE_VIDEO_FORMAT_MPEG12:214return RUVD_CODEC_MPEG2;215216case PIPE_VIDEO_FORMAT_MPEG4:217return RUVD_CODEC_MPEG4;218219case PIPE_VIDEO_FORMAT_JPEG:220return RUVD_CODEC_MJPEG;221222default:223assert(0);224return 0;225}226}227228229static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)230{231return 16;232}233234/* calculate size of reference picture buffer */235static unsigned calc_dpb_size(struct ruvd_decoder *dec)236{237unsigned width_in_mb, height_in_mb, image_size, dpb_size;238239// always align them to MB size for dpb calculation240unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);241unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);242243// always one more for currently decoded picture244unsigned max_references = dec->base.max_references + 1;245246// aligned size of a single frame247image_size = align(width, get_db_pitch_alignment(dec)) * height;248image_size += image_size / 2;249image_size = align(image_size, 1024);250251// picture width & height in 16 pixel units252width_in_mb = width / VL_MACROBLOCK_WIDTH;253height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);254255switch (u_reduce_video_profile(dec->base.profile)) {256case PIPE_VIDEO_FORMAT_MPEG4_AVC: {257if (!dec->use_legacy) {258unsigned fs_in_mb = width_in_mb * height_in_mb;259unsigned alignment = 64, num_dpb_buffer;260261if (dec->stream_type == RUVD_CODEC_H264_PERF)262alignment = 256;263switch(dec->base.level) {264case 30:265num_dpb_buffer = 8100 / fs_in_mb;266break;267case 31:268num_dpb_buffer = 18000 / fs_in_mb;269break;270case 32:271num_dpb_buffer = 20480 / fs_in_mb;272break;273case 41:274num_dpb_buffer = 32768 / fs_in_mb;275break;276case 42:277num_dpb_buffer = 34816 / fs_in_mb;278break;279case 50:280num_dpb_buffer = 110400 / fs_in_mb;281break;282case 51:283num_dpb_buffer = 184320 / fs_in_mb;284break;285default:286num_dpb_buffer = 184320 / fs_in_mb;287break;288}289num_dpb_buffer++;290max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);291dpb_size = image_size * max_references;292if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {293dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);294dpb_size += align(width_in_mb * height_in_mb * 32, alignment);295}296} else {297// the firmware seems to allways assume a minimum of ref frames298max_references = MAX2(NUM_H264_REFS, max_references);299// reference picture buffer300dpb_size = image_size * max_references;301if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {302// macroblock context buffer303dpb_size += width_in_mb * height_in_mb * max_references * 192;304// IT surface buffer305dpb_size += width_in_mb * height_in_mb * 32;306}307}308break;309}310311case PIPE_VIDEO_FORMAT_VC1:312// the firmware seems to allways assume a minimum of ref frames313max_references = MAX2(NUM_VC1_REFS, max_references);314315// reference picture buffer316dpb_size = image_size * max_references;317318// CONTEXT_BUFFER319dpb_size += width_in_mb * height_in_mb * 128;320321// IT surface buffer322dpb_size += width_in_mb * 64;323324// DB surface buffer325dpb_size += width_in_mb * 128;326327// BP328dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);329break;330331case PIPE_VIDEO_FORMAT_MPEG12:332// reference picture buffer, must be big enough for all frames333dpb_size = image_size * NUM_MPEG2_REFS;334break;335336case PIPE_VIDEO_FORMAT_MPEG4:337// reference picture buffer338dpb_size = image_size * max_references;339340// CM341dpb_size += width_in_mb * height_in_mb * 64;342343// IT surface buffer344dpb_size += align(width_in_mb * height_in_mb * 32, 64);345346dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);347break;348349case PIPE_VIDEO_FORMAT_JPEG:350dpb_size = 0;351break;352353default:354// something is missing here355assert(0);356357// at least use a sane default value358dpb_size = 32 * 1024 * 1024;359break;360}361return dpb_size;362}363364/* free associated data in the video buffer callback */365static void ruvd_destroy_associated_data(void *data)366{367/* NOOP, since we only use an intptr */368}369370/* get h264 specific message bits */371static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)372{373struct ruvd_h264 result;374375memset(&result, 0, sizeof(result));376switch (pic->base.profile) {377case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:378case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:379result.profile = RUVD_H264_PROFILE_BASELINE;380break;381382case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:383result.profile = RUVD_H264_PROFILE_MAIN;384break;385386case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:387result.profile = RUVD_H264_PROFILE_HIGH;388break;389390default:391assert(0);392break;393}394395result.level = dec->base.level;396397result.sps_info_flags = 0;398result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;399result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;400result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;401result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;402403result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;404result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;405result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;406result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;407result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;408409switch (dec->base.chroma_format) {410case PIPE_VIDEO_CHROMA_FORMAT_NONE:411/* TODO: assert? */412break;413case PIPE_VIDEO_CHROMA_FORMAT_400:414result.chroma_format = 0;415break;416case PIPE_VIDEO_CHROMA_FORMAT_420:417result.chroma_format = 1;418break;419case PIPE_VIDEO_CHROMA_FORMAT_422:420result.chroma_format = 2;421break;422case PIPE_VIDEO_CHROMA_FORMAT_444:423result.chroma_format = 3;424break;425}426427result.pps_info_flags = 0;428result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;429result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;430result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;431result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;432result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;433result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;434result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;435result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;436437result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;438result.slice_group_map_type = pic->pps->slice_group_map_type;439result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;440result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;441result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;442result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;443444memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);445memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);446447if (dec->stream_type == RUVD_CODEC_H264_PERF) {448memcpy(dec->it, result.scaling_list_4x4, 6*16);449memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);450}451452result.num_ref_frames = pic->num_ref_frames;453454result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;455result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;456457result.frame_num = pic->frame_num;458memcpy(result.frame_num_list, pic->frame_num_list, 4*16);459result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];460result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];461memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);462463result.decoded_pic_idx = pic->frame_num;464465return result;466}467468/* get vc1 specific message bits */469static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)470{471struct ruvd_vc1 result;472473memset(&result, 0, sizeof(result));474475switch(pic->base.profile) {476case PIPE_VIDEO_PROFILE_VC1_SIMPLE:477result.profile = RUVD_VC1_PROFILE_SIMPLE;478result.level = 1;479break;480481case PIPE_VIDEO_PROFILE_VC1_MAIN:482result.profile = RUVD_VC1_PROFILE_MAIN;483result.level = 2;484break;485486case PIPE_VIDEO_PROFILE_VC1_ADVANCED:487result.profile = RUVD_VC1_PROFILE_ADVANCED;488result.level = 4;489break;490491default:492assert(0);493}494495/* fields common for all profiles */496result.sps_info_flags |= pic->postprocflag << 7;497result.sps_info_flags |= pic->pulldown << 6;498result.sps_info_flags |= pic->interlace << 5;499result.sps_info_flags |= pic->tfcntrflag << 4;500result.sps_info_flags |= pic->finterpflag << 3;501result.sps_info_flags |= pic->psf << 1;502503result.pps_info_flags |= pic->range_mapy_flag << 31;504result.pps_info_flags |= pic->range_mapy << 28;505result.pps_info_flags |= pic->range_mapuv_flag << 27;506result.pps_info_flags |= pic->range_mapuv << 24;507result.pps_info_flags |= pic->multires << 21;508result.pps_info_flags |= pic->maxbframes << 16;509result.pps_info_flags |= pic->overlap << 11;510result.pps_info_flags |= pic->quantizer << 9;511result.pps_info_flags |= pic->panscan_flag << 7;512result.pps_info_flags |= pic->refdist_flag << 6;513result.pps_info_flags |= pic->vstransform << 0;514515/* some fields only apply to main/advanced profile */516if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {517result.pps_info_flags |= pic->syncmarker << 20;518result.pps_info_flags |= pic->rangered << 19;519result.pps_info_flags |= pic->loopfilter << 5;520result.pps_info_flags |= pic->fastuvmc << 4;521result.pps_info_flags |= pic->extended_mv << 3;522result.pps_info_flags |= pic->extended_dmv << 8;523result.pps_info_flags |= pic->dquant << 1;524}525526result.chroma_format = 1;527528#if 0529//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)530uint32_t slice_count531uint8_t picture_type532uint8_t frame_coding_mode533uint8_t deblockEnable534uint8_t pquant535#endif536537return result;538}539540/* extract the frame number from a referenced video buffer */541static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)542{543uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;544uint32_t max = MAX2(dec->frame_number, 1) - 1;545uintptr_t frame;546547/* seems to be the most sane fallback */548if (!ref)549return max;550551/* get the frame number from the associated data */552frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);553554/* limit the frame number to a valid range */555return MAX2(MIN2(frame, max), min);556}557558/* get mpeg2 specific msg bits */559static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,560struct pipe_mpeg12_picture_desc *pic)561{562const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;563struct ruvd_mpeg2 result;564unsigned i;565566memset(&result, 0, sizeof(result));567result.decoded_pic_idx = dec->frame_number;568for (i = 0; i < 2; ++i)569result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);570571result.load_intra_quantiser_matrix = 1;572result.load_nonintra_quantiser_matrix = 1;573574for (i = 0; i < 64; ++i) {575result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];576result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];577}578579result.profile_and_level_indication = 0;580result.chroma_format = 0x1;581582result.picture_coding_type = pic->picture_coding_type;583result.f_code[0][0] = pic->f_code[0][0] + 1;584result.f_code[0][1] = pic->f_code[0][1] + 1;585result.f_code[1][0] = pic->f_code[1][0] + 1;586result.f_code[1][1] = pic->f_code[1][1] + 1;587result.intra_dc_precision = pic->intra_dc_precision;588result.pic_structure = pic->picture_structure;589result.top_field_first = pic->top_field_first;590result.frame_pred_frame_dct = pic->frame_pred_frame_dct;591result.concealment_motion_vectors = pic->concealment_motion_vectors;592result.q_scale_type = pic->q_scale_type;593result.intra_vlc_format = pic->intra_vlc_format;594result.alternate_scan = pic->alternate_scan;595596return result;597}598599/* get mpeg4 specific msg bits */600static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,601struct pipe_mpeg4_picture_desc *pic)602{603struct ruvd_mpeg4 result;604unsigned i;605606memset(&result, 0, sizeof(result));607result.decoded_pic_idx = dec->frame_number;608for (i = 0; i < 2; ++i)609result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);610611result.variant_type = 0;612result.profile_and_level_indication = 0xF0; // ASP Level0613614result.video_object_layer_verid = 0x5; // advanced simple615result.video_object_layer_shape = 0x0; // rectangular616617result.video_object_layer_width = dec->base.width;618result.video_object_layer_height = dec->base.height;619620result.vop_time_increment_resolution = pic->vop_time_increment_resolution;621622result.flags |= pic->short_video_header << 0;623//result.flags |= obmc_disable << 1;624result.flags |= pic->interlaced << 2;625result.flags |= 1 << 3; // load_intra_quant_mat626result.flags |= 1 << 4; // load_nonintra_quant_mat627result.flags |= pic->quarter_sample << 5;628result.flags |= 1 << 6; // complexity_estimation_disable629result.flags |= pic->resync_marker_disable << 7;630//result.flags |= data_partitioned << 8;631//result.flags |= reversible_vlc << 9;632result.flags |= 0 << 10; // newpred_enable633result.flags |= 0 << 11; // reduced_resolution_vop_enable634//result.flags |= scalability << 12;635//result.flags |= is_object_layer_identifier << 13;636//result.flags |= fixed_vop_rate << 14;637//result.flags |= newpred_segment_type << 15;638639result.quant_type = pic->quant_type;640641for (i = 0; i < 64; ++i) {642result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];643result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];644}645646/*647int32_t trd [2]648int32_t trb [2]649uint8_t vop_coding_type650uint8_t vop_fcode_forward651uint8_t vop_fcode_backward652uint8_t rounding_control653uint8_t alternate_vertical_scan_flag654uint8_t top_field_first655*/656657return result;658}659660static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic)661{662int size = 0, saved_size, len_pos, i;663uint16_t *bs;664uint8_t *buf = dec->bs_ptr;665666/* SOI */667buf[size++] = 0xff;668buf[size++] = 0xd8;669670/* DQT */671buf[size++] = 0xff;672buf[size++] = 0xdb;673674len_pos = size++;675size++;676677for (i = 0; i < 4; ++i) {678if (pic->quantization_table.load_quantiser_table[i] == 0)679continue;680681buf[size++] = i;682memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);683size += 64;684}685686bs = (uint16_t*)&buf[len_pos];687*bs = util_bswap16(size - 4);688689saved_size = size;690691/* DHT */692buf[size++] = 0xff;693buf[size++] = 0xc4;694695len_pos = size++;696size++;697698for (i = 0; i < 2; ++i) {699if (pic->huffman_table.load_huffman_table[i] == 0)700continue;701702buf[size++] = 0x00 | i;703memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);704size += 16;705memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);706size += 12;707}708709for (i = 0; i < 2; ++i) {710if (pic->huffman_table.load_huffman_table[i] == 0)711continue;712713buf[size++] = 0x10 | i;714memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);715size += 16;716memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);717size += 162;718}719720bs = (uint16_t*)&buf[len_pos];721*bs = util_bswap16(size - saved_size - 2);722723saved_size = size;724725/* DRI */726if (pic->slice_parameter.restart_interval) {727buf[size++] = 0xff;728buf[size++] = 0xdd;729buf[size++] = 0x00;730buf[size++] = 0x04;731bs = (uint16_t*)&buf[size++];732*bs = util_bswap16(pic->slice_parameter.restart_interval);733saved_size = ++size;734}735736/* SOF */737buf[size++] = 0xff;738buf[size++] = 0xc0;739740len_pos = size++;741size++;742743buf[size++] = 0x08;744745bs = (uint16_t*)&buf[size++];746*bs = util_bswap16(pic->picture_parameter.picture_height);747size++;748749bs = (uint16_t*)&buf[size++];750*bs = util_bswap16(pic->picture_parameter.picture_width);751size++;752753buf[size++] = pic->picture_parameter.num_components;754755for (i = 0; i < pic->picture_parameter.num_components; ++i) {756buf[size++] = pic->picture_parameter.components[i].component_id;757buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |758pic->picture_parameter.components[i].v_sampling_factor;759buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;760}761762bs = (uint16_t*)&buf[len_pos];763*bs = util_bswap16(size - saved_size - 2);764765saved_size = size;766767/* SOS */768buf[size++] = 0xff;769buf[size++] = 0xda;770771len_pos = size++;772size++;773774buf[size++] = pic->slice_parameter.num_components;775776for (i = 0; i < pic->slice_parameter.num_components; ++i) {777buf[size++] = pic->slice_parameter.components[i].component_selector;778buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |779pic->slice_parameter.components[i].ac_table_selector;780}781782buf[size++] = 0x00;783buf[size++] = 0x3f;784buf[size++] = 0x00;785786bs = (uint16_t*)&buf[len_pos];787*bs = util_bswap16(size - saved_size - 2);788789dec->bs_ptr += size;790dec->bs_size += size;791}792793/**794* destroy this video decoder795*/796static void ruvd_destroy(struct pipe_video_codec *decoder)797{798struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;799unsigned i;800801assert(decoder);802803map_msg_fb_it_buf(dec);804dec->msg->size = sizeof(*dec->msg);805dec->msg->msg_type = RUVD_MSG_DESTROY;806dec->msg->stream_handle = dec->stream_handle;807send_msg_buf(dec);808809flush(dec, 0);810811dec->ws->cs_destroy(&dec->cs);812813for (i = 0; i < NUM_BUFFERS; ++i) {814rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);815rvid_destroy_buffer(&dec->bs_buffers[i]);816}817818rvid_destroy_buffer(&dec->dpb);819rvid_destroy_buffer(&dec->ctx);820rvid_destroy_buffer(&dec->sessionctx);821822FREE(dec);823}824825/**826* start decoding of a new frame827*/828static void ruvd_begin_frame(struct pipe_video_codec *decoder,829struct pipe_video_buffer *target,830struct pipe_picture_desc *picture)831{832struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;833uintptr_t frame;834835assert(decoder);836837frame = ++dec->frame_number;838vl_video_buffer_set_associated_data(target, decoder, (void *)frame,839&ruvd_destroy_associated_data);840841dec->bs_size = 0;842dec->bs_ptr = dec->ws->buffer_map(dec->ws,843dec->bs_buffers[dec->cur_buffer].res->buf,844&dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);845}846847/**848* decode a macroblock849*/850static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,851struct pipe_video_buffer *target,852struct pipe_picture_desc *picture,853const struct pipe_macroblock *macroblocks,854unsigned num_macroblocks)855{856/* not supported (yet) */857assert(0);858}859860/**861* decode a bitstream862*/863static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,864struct pipe_video_buffer *target,865struct pipe_picture_desc *picture,866unsigned num_buffers,867const void * const *buffers,868const unsigned *sizes)869{870struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;871enum pipe_video_format format = u_reduce_video_profile(picture->profile);872unsigned i;873874assert(decoder);875876if (!dec->bs_ptr)877return;878879if (format == PIPE_VIDEO_FORMAT_JPEG)880get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);881882for (i = 0; i < num_buffers; ++i) {883struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];884unsigned new_size = dec->bs_size + sizes[i];885886if (format == PIPE_VIDEO_FORMAT_JPEG)887new_size += 2; /* save for EOI */888889if (new_size > buf->res->buf->size) {890dec->ws->buffer_unmap(dec->ws, buf->res->buf);891dec->bs_ptr = NULL;892if (!rvid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) {893RVID_ERR("Can't resize bitstream buffer!");894return;895}896897dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,898PIPE_MAP_WRITE |899RADEON_MAP_TEMPORARY);900if (!dec->bs_ptr)901return;902903dec->bs_ptr += dec->bs_size;904}905906memcpy(dec->bs_ptr, buffers[i], sizes[i]);907dec->bs_size += sizes[i];908dec->bs_ptr += sizes[i];909}910911if (format == PIPE_VIDEO_FORMAT_JPEG) {912((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */913((uint8_t *)dec->bs_ptr)[1] = 0xd9;914dec->bs_size += 2;915dec->bs_ptr += 2;916}917}918919/**920* end decoding of the current frame921*/922static void ruvd_end_frame(struct pipe_video_codec *decoder,923struct pipe_video_buffer *target,924struct pipe_picture_desc *picture)925{926struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;927struct pb_buffer *dt;928struct rvid_buffer *msg_fb_it_buf, *bs_buf;929unsigned bs_size;930931assert(decoder);932933if (!dec->bs_ptr)934return;935936msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];937bs_buf = &dec->bs_buffers[dec->cur_buffer];938939bs_size = align(dec->bs_size, 128);940memset(dec->bs_ptr, 0, bs_size - dec->bs_size);941dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);942dec->bs_ptr = NULL;943944map_msg_fb_it_buf(dec);945dec->msg->size = sizeof(*dec->msg);946dec->msg->msg_type = RUVD_MSG_DECODE;947dec->msg->stream_handle = dec->stream_handle;948dec->msg->status_report_feedback_number = dec->frame_number;949950dec->msg->body.decode.stream_type = dec->stream_type;951dec->msg->body.decode.decode_flags = 0x1;952dec->msg->body.decode.width_in_samples = dec->base.width;953dec->msg->body.decode.height_in_samples = dec->base.height;954955if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||956(picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {957dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;958dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;959}960961if (dec->dpb.res)962dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;963dec->msg->body.decode.bsd_size = bs_size;964dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));965966dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);967968switch (u_reduce_video_profile(picture->profile)) {969case PIPE_VIDEO_FORMAT_MPEG4_AVC:970dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);971break;972973case PIPE_VIDEO_FORMAT_VC1:974dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);975break;976977case PIPE_VIDEO_FORMAT_MPEG12:978dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);979break;980981case PIPE_VIDEO_FORMAT_MPEG4:982dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);983break;984985case PIPE_VIDEO_FORMAT_JPEG:986break;987988default:989assert(0);990return;991}992993dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;994dec->msg->body.decode.extension_support = 0x1;995996/* set at least the feedback buffer size */997dec->fb[0] = dec->fb_size;998999send_msg_buf(dec);10001001if (dec->dpb.res)1002send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,1003RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);10041005if (dec->ctx.res)1006send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,1007RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);1008send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,10090, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);1010send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,1011RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);1012send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,1013FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);1014if (have_it(dec))1015send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,1016FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);1017set_reg(dec, dec->reg.cntl, 1);10181019flush(dec, PIPE_FLUSH_ASYNC);1020next_buffer(dec);1021}10221023/**1024* flush any outstanding command buffers to the hardware1025*/1026static void ruvd_flush(struct pipe_video_codec *decoder)1027{1028}10291030/**1031* create and UVD decoder1032*/1033struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,1034const struct pipe_video_codec *templ,1035ruvd_set_dtb set_dtb)1036{1037struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;1038struct r600_common_context *rctx = (struct r600_common_context*)context;1039unsigned dpb_size;1040unsigned width = templ->width, height = templ->height;1041unsigned bs_buf_size;1042struct radeon_info info;1043struct ruvd_decoder *dec;1044int r, i;10451046ws->query_info(ws, &info, false, false);10471048switch(u_reduce_video_profile(templ->profile)) {1049case PIPE_VIDEO_FORMAT_MPEG12:1050if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)1051return vl_create_mpeg12_decoder(context, templ);10521053FALLTHROUGH;1054case PIPE_VIDEO_FORMAT_MPEG4:1055width = align(width, VL_MACROBLOCK_WIDTH);1056height = align(height, VL_MACROBLOCK_HEIGHT);1057break;1058case PIPE_VIDEO_FORMAT_MPEG4_AVC:1059width = align(width, VL_MACROBLOCK_WIDTH);1060height = align(height, VL_MACROBLOCK_HEIGHT);1061break;10621063default:1064break;1065}106610671068dec = CALLOC_STRUCT(ruvd_decoder);10691070if (!dec)1071return NULL;10721073dec->use_legacy = true;10741075dec->base = *templ;1076dec->base.context = context;1077dec->base.width = width;1078dec->base.height = height;10791080dec->base.destroy = ruvd_destroy;1081dec->base.begin_frame = ruvd_begin_frame;1082dec->base.decode_macroblock = ruvd_decode_macroblock;1083dec->base.decode_bitstream = ruvd_decode_bitstream;1084dec->base.end_frame = ruvd_end_frame;1085dec->base.flush = ruvd_flush;10861087dec->stream_type = profile2stream_type(dec, info.family);1088dec->set_dtb = set_dtb;1089dec->stream_handle = rvid_alloc_stream_handle();1090dec->screen = context->screen;1091dec->ws = ws;10921093if (!ws->cs_create(&dec->cs, rctx->ctx, RING_UVD, NULL, NULL, false)) {1094RVID_ERR("Can't get command submission context.\n");1095goto error;1096}10971098dec->fb_size = FB_BUFFER_SIZE;1099bs_buf_size = width * height * (512 / (16 * 16));1100for (i = 0; i < NUM_BUFFERS; ++i) {1101unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;1102STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);1103if (have_it(dec))1104msg_fb_it_size += IT_SCALING_TABLE_SIZE;1105if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],1106msg_fb_it_size, PIPE_USAGE_STAGING)) {1107RVID_ERR("Can't allocated message buffers.\n");1108goto error;1109}11101111if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],1112bs_buf_size, PIPE_USAGE_STAGING)) {1113RVID_ERR("Can't allocated bitstream buffers.\n");1114goto error;1115}11161117rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);1118rvid_clear_buffer(context, &dec->bs_buffers[i]);1119}11201121dpb_size = calc_dpb_size(dec);1122if (dpb_size) {1123if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {1124RVID_ERR("Can't allocated dpb.\n");1125goto error;1126}1127rvid_clear_buffer(context, &dec->dpb);1128}11291130dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;1131dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;1132dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;1133dec->reg.cntl = RUVD_ENGINE_CNTL;11341135map_msg_fb_it_buf(dec);1136dec->msg->size = sizeof(*dec->msg);1137dec->msg->msg_type = RUVD_MSG_CREATE;1138dec->msg->stream_handle = dec->stream_handle;1139dec->msg->body.create.stream_type = dec->stream_type;1140dec->msg->body.create.width_in_samples = dec->base.width;1141dec->msg->body.create.height_in_samples = dec->base.height;1142dec->msg->body.create.dpb_size = dpb_size;1143send_msg_buf(dec);1144r = flush(dec, 0);1145if (r)1146goto error;11471148next_buffer(dec);11491150return &dec->base;11511152error:1153dec->ws->cs_destroy(&dec->cs);11541155for (i = 0; i < NUM_BUFFERS; ++i) {1156rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);1157rvid_destroy_buffer(&dec->bs_buffers[i]);1158}11591160rvid_destroy_buffer(&dec->dpb);1161rvid_destroy_buffer(&dec->ctx);1162rvid_destroy_buffer(&dec->sessionctx);11631164FREE(dec);11651166return NULL;1167}11681169/* calculate top/bottom offset */1170static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)1171{1172return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +1173layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;1174}11751176/* hw encode the aspect of macro tiles */1177static unsigned macro_tile_aspect(unsigned macro_tile_aspect)1178{1179switch (macro_tile_aspect) {1180default:1181case 1: macro_tile_aspect = 0; break;1182case 2: macro_tile_aspect = 1; break;1183case 4: macro_tile_aspect = 2; break;1184case 8: macro_tile_aspect = 3; break;1185}1186return macro_tile_aspect;1187}11881189/* hw encode the bank width and height */1190static unsigned bank_wh(unsigned bankwh)1191{1192switch (bankwh) {1193default:1194case 1: bankwh = 0; break;1195case 2: bankwh = 1; break;1196case 4: bankwh = 2; break;1197case 8: bankwh = 3; break;1198}1199return bankwh;1200}12011202/**1203* fill decoding target field from the luma and chroma surfaces1204*/1205void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,1206struct radeon_surf *chroma)1207{1208msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;1209switch (luma->u.legacy.level[0].mode) {1210case RADEON_SURF_MODE_LINEAR_ALIGNED:1211msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;1212msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;1213break;1214case RADEON_SURF_MODE_1D:1215msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;1216msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;1217break;1218case RADEON_SURF_MODE_2D:1219msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;1220msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;1221break;1222default:1223assert(0);1224break;1225}12261227msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);1228if (chroma)1229msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);1230if (msg->body.decode.dt_field_mode) {1231msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);1232if (chroma)1233msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);1234} else {1235msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;1236msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;1237}12381239if (chroma) {1240assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);1241assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);1242assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);1243}12441245msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));1246msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));1247msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));1248}124912501251