Path: blob/21.2-virgl/src/gallium/drivers/radeon/radeon_vce.c
4570 views
/**************************************************************************1*2* Copyright 2013 Advanced Micro Devices, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "radeon_vce.h"2829#include "pipe/p_video_codec.h"30#include "radeon_video.h"31#include "radeonsi/si_pipe.h"32#include "util/u_memory.h"33#include "util/u_video.h"34#include "vl/vl_video_buffer.h"3536#include <stdio.h>3738#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))39#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))40#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))41#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))42#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))43#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))44#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))45#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))46#define FW_53 (53 << 24)4748/**49* flush commands to the hardware50*/51static void flush(struct rvce_encoder *enc)52{53enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL);54enc->task_info_idx = 0;55enc->bs_idx = 0;56}5758#if 059static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)60{61uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE);62unsigned i = 0;63fprintf(stderr, "\n");64fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);65fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);66fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);67fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);68fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);69fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);70fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);71fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);72fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);73fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);74fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);75fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);76fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);77fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);78fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);79fprintf(stderr, "\n");80enc->ws->buffer_unmap(fb->res->buf);81}82#endif8384/**85* reset the CPB handling86*/87static void reset_cpb(struct rvce_encoder *enc)88{89unsigned i;9091list_inithead(&enc->cpb_slots);92for (i = 0; i < enc->cpb_num; ++i) {93struct rvce_cpb_slot *slot = &enc->cpb_array[i];94slot->index = i;95slot->picture_type = PIPE_H2645_ENC_PICTURE_TYPE_SKIP;96slot->frame_num = 0;97slot->pic_order_cnt = 0;98list_addtail(&slot->list, &enc->cpb_slots);99}100}101102/**103* sort l0 and l1 to the top of the list104*/105static void sort_cpb(struct rvce_encoder *enc)106{107struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;108109LIST_FOR_EACH_ENTRY (i, &enc->cpb_slots, list) {110if (i->frame_num == enc->pic.ref_idx_l0)111l0 = i;112113if (i->frame_num == enc->pic.ref_idx_l1)114l1 = i;115116if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P && l0)117break;118119if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B && l0 && l1)120break;121}122123if (l1) {124list_del(&l1->list);125list_add(&l1->list, &enc->cpb_slots);126}127128if (l0) {129list_del(&l0->list);130list_add(&l0->list, &enc->cpb_slots);131}132}133134/**135* get number of cpbs based on dpb136*/137static unsigned get_cpb_num(struct rvce_encoder *enc)138{139unsigned w = align(enc->base.width, 16) / 16;140unsigned h = align(enc->base.height, 16) / 16;141unsigned dpb;142143switch (enc->base.level) {144case 10:145dpb = 396;146break;147case 11:148dpb = 900;149break;150case 12:151case 13:152case 20:153dpb = 2376;154break;155case 21:156dpb = 4752;157break;158case 22:159case 30:160dpb = 8100;161break;162case 31:163dpb = 18000;164break;165case 32:166dpb = 20480;167break;168case 40:169case 41:170dpb = 32768;171break;172case 42:173dpb = 34816;174break;175case 50:176dpb = 110400;177break;178default:179case 51:180case 52:181dpb = 184320;182break;183}184185return MIN2(dpb / (w * h), 16);186}187188/**189* Get the slot for the currently encoded frame190*/191struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc)192{193return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);194}195196/**197* Get the slot for L0198*/199struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc)200{201return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);202}203204/**205* Get the slot for L1206*/207struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)208{209return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);210}211212/**213* Calculate the offsets into the CPB214*/215void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset,216signed *chroma_offset)217{218struct si_screen *sscreen = (struct si_screen *)enc->screen;219unsigned pitch, vpitch, fsize;220221if (sscreen->info.chip_class < GFX9) {222pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);223vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);224} else {225pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);226vpitch = align(enc->luma->u.gfx9.surf_height, 16);227}228fsize = pitch * (vpitch + vpitch / 2);229230*luma_offset = slot->index * fsize;231*chroma_offset = *luma_offset + pitch * vpitch;232}233234/**235* destroy this video encoder236*/237static void rvce_destroy(struct pipe_video_codec *encoder)238{239struct rvce_encoder *enc = (struct rvce_encoder *)encoder;240if (enc->stream_handle) {241struct rvid_buffer fb;242si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);243enc->fb = &fb;244enc->session(enc);245enc->destroy(enc);246flush(enc);247si_vid_destroy_buffer(&fb);248}249si_vid_destroy_buffer(&enc->cpb);250enc->ws->cs_destroy(&enc->cs);251FREE(enc->cpb_array);252FREE(enc);253}254255static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,256struct pipe_picture_desc *picture)257{258struct rvce_encoder *enc = (struct rvce_encoder *)encoder;259struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;260struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;261262bool need_rate_control =263enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||264enc->pic.quant_i_frames != pic->quant_i_frames ||265enc->pic.quant_p_frames != pic->quant_p_frames ||266enc->pic.quant_b_frames != pic->quant_b_frames ||267enc->pic.rate_ctrl.target_bitrate != pic->rate_ctrl.target_bitrate ||268enc->pic.rate_ctrl.frame_rate_num != pic->rate_ctrl.frame_rate_num ||269enc->pic.rate_ctrl.frame_rate_den != pic->rate_ctrl.frame_rate_den;270271enc->pic = *pic;272enc->si_get_pic_param(enc, pic);273274enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);275enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);276277if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR)278reset_cpb(enc);279else if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P ||280pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B)281sort_cpb(enc);282283if (!enc->stream_handle) {284struct rvid_buffer fb;285enc->stream_handle = si_vid_alloc_stream_handle();286si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);287enc->fb = &fb;288enc->session(enc);289enc->create(enc);290enc->config(enc);291enc->feedback(enc);292flush(enc);293// dump_feedback(enc, &fb);294si_vid_destroy_buffer(&fb);295need_rate_control = false;296}297298if (need_rate_control) {299enc->session(enc);300enc->config(enc);301flush(enc);302}303}304305static void rvce_encode_bitstream(struct pipe_video_codec *encoder,306struct pipe_video_buffer *source,307struct pipe_resource *destination, void **fb)308{309struct rvce_encoder *enc = (struct rvce_encoder *)encoder;310enc->get_buffer(destination, &enc->bs_handle, NULL);311enc->bs_size = destination->width0;312313*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);314if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {315RVID_ERR("Can't create feedback buffer.\n");316return;317}318if (!radeon_emitted(&enc->cs, 0))319enc->session(enc);320enc->encode(enc);321enc->feedback(enc);322}323324static void rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,325struct pipe_picture_desc *picture)326{327struct rvce_encoder *enc = (struct rvce_encoder *)encoder;328struct rvce_cpb_slot *slot = LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);329330if (!enc->dual_inst || enc->bs_idx > 1)331flush(enc);332333/* update the CPB backtrack with the just encoded frame */334slot->picture_type = enc->pic.picture_type;335slot->frame_num = enc->pic.frame_num;336slot->pic_order_cnt = enc->pic.pic_order_cnt;337if (!enc->pic.not_referenced) {338list_del(&slot->list);339list_add(&slot->list, &enc->cpb_slots);340}341}342343static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size)344{345struct rvce_encoder *enc = (struct rvce_encoder *)encoder;346struct rvid_buffer *fb = feedback;347348if (size) {349uint32_t *ptr = enc->ws->buffer_map(enc->ws, fb->res->buf, &enc->cs,350PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);351352if (ptr[1]) {353*size = ptr[4] - ptr[9];354} else {355*size = 0;356}357358enc->ws->buffer_unmap(enc->ws, fb->res->buf);359}360// dump_feedback(enc, fb);361si_vid_destroy_buffer(fb);362FREE(fb);363}364365/**366* flush any outstanding command buffers to the hardware367*/368static void rvce_flush(struct pipe_video_codec *encoder)369{370struct rvce_encoder *enc = (struct rvce_encoder *)encoder;371372flush(enc);373}374375static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence)376{377// just ignored378}379380struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,381const struct pipe_video_codec *templ,382struct radeon_winsys *ws, rvce_get_buffer get_buffer)383{384struct si_screen *sscreen = (struct si_screen *)context->screen;385struct si_context *sctx = (struct si_context *)context;386struct rvce_encoder *enc;387struct pipe_video_buffer *tmp_buf, templat = {};388struct radeon_surf *tmp_surf;389unsigned cpb_size;390391if (!sscreen->info.vce_fw_version) {392RVID_ERR("Kernel doesn't supports VCE!\n");393return NULL;394395} else if (!si_vce_is_fw_version_supported(sscreen)) {396RVID_ERR("Unsupported VCE fw version loaded!\n");397return NULL;398}399400enc = CALLOC_STRUCT(rvce_encoder);401if (!enc)402return NULL;403404if (sscreen->info.is_amdgpu)405enc->use_vm = true;406if ((!sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 42) || sscreen->info.is_amdgpu)407enc->use_vui = true;408if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY &&409sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 &&410sscreen->info.family != CHIP_VEGAM)411enc->dual_pipe = true;412/* TODO enable B frame with dual instance */413if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) &&414(sscreen->info.vce_harvest_config == 0))415enc->dual_inst = true;416417enc->base = *templ;418enc->base.context = context;419420enc->base.destroy = rvce_destroy;421enc->base.begin_frame = rvce_begin_frame;422enc->base.encode_bitstream = rvce_encode_bitstream;423enc->base.end_frame = rvce_end_frame;424enc->base.flush = rvce_flush;425enc->base.get_feedback = rvce_get_feedback;426enc->get_buffer = get_buffer;427428enc->screen = context->screen;429enc->ws = ws;430431if (!ws->cs_create(&enc->cs, sctx->ctx, RING_VCE, rvce_cs_flush, enc, false)) {432RVID_ERR("Can't get command submission context.\n");433goto error;434}435436templat.buffer_format = PIPE_FORMAT_NV12;437templat.width = enc->base.width;438templat.height = enc->base.height;439templat.interlaced = false;440if (!(tmp_buf = context->create_video_buffer(context, &templat))) {441RVID_ERR("Can't create video buffer.\n");442goto error;443}444445enc->cpb_num = get_cpb_num(enc);446if (!enc->cpb_num)447goto error;448449get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);450451cpb_size = (sscreen->info.chip_class < GFX9)452? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *453align(tmp_surf->u.legacy.level[0].nblk_y, 32)454:455456align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *457align(tmp_surf->u.gfx9.surf_height, 32);458459cpb_size = cpb_size * 3 / 2;460cpb_size = cpb_size * enc->cpb_num;461if (enc->dual_pipe)462cpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;463tmp_buf->destroy(tmp_buf);464if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {465RVID_ERR("Can't create CPB buffer.\n");466goto error;467}468469enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));470if (!enc->cpb_array)471goto error;472473reset_cpb(enc);474475switch (sscreen->info.vce_fw_version) {476case FW_40_2_2:477si_vce_40_2_2_init(enc);478break;479480case FW_50_0_1:481case FW_50_1_2:482case FW_50_10_2:483case FW_50_17_3:484si_vce_50_init(enc);485break;486487case FW_52_0_3:488case FW_52_4_3:489case FW_52_8_3:490si_vce_52_init(enc);491break;492493default:494if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) {495si_vce_52_init(enc);496} else497goto error;498}499500return &enc->base;501502error:503enc->ws->cs_destroy(&enc->cs);504505si_vid_destroy_buffer(&enc->cpb);506507FREE(enc->cpb_array);508FREE(enc);509return NULL;510}511512/**513* check if kernel has the right fw version loaded514*/515bool si_vce_is_fw_version_supported(struct si_screen *sscreen)516{517switch (sscreen->info.vce_fw_version) {518case FW_40_2_2:519case FW_50_0_1:520case FW_50_1_2:521case FW_50_10_2:522case FW_50_17_3:523case FW_52_0_3:524case FW_52_4_3:525case FW_52_8_3:526return true;527default:528if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)529return true;530else531return false;532}533}534535/**536* Add the buffer as relocation to the current command submission537*/538void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage,539enum radeon_bo_domain domain, signed offset)540{541int reloc_idx;542543reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0);544if (enc->use_vm) {545uint64_t addr;546addr = enc->ws->buffer_get_virtual_address(buf);547addr = addr + offset;548RVCE_CS(addr >> 32);549RVCE_CS(addr);550} else {551offset += enc->ws->buffer_get_reloc_offset(buf);552RVCE_CS(reloc_idx * 4);553RVCE_CS(offset);554}555}556557558