Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c
4574 views
/*1* Copyright 2013 Ilia Mirkin2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "nv50/nv84_video.h"2324struct iparm {25struct iseqparm {26uint32_t chroma_format_idc; // 0027uint32_t pad[(0x128 - 0x4) / 4];28uint32_t log2_max_frame_num_minus4; // 12829uint32_t pic_order_cnt_type; // 12c30uint32_t log2_max_pic_order_cnt_lsb_minus4; // 13031uint32_t delta_pic_order_always_zero_flag; // 13432uint32_t num_ref_frames; // 13833uint32_t pic_width_in_mbs_minus1; // 13c34uint32_t pic_height_in_map_units_minus1; // 14035uint32_t frame_mbs_only_flag; // 14436uint32_t mb_adaptive_frame_field_flag; // 14837uint32_t direct_8x8_inference_flag; // 14c38} iseqparm; // 00039struct ipicparm {40uint32_t entropy_coding_mode_flag; // 0041uint32_t pic_order_present_flag; // 0442uint32_t num_slice_groups_minus1; // 0843uint32_t slice_group_map_type; // 0c44uint32_t pad1[0x60 / 4];45uint32_t u70; // 7046uint32_t u74; // 7447uint32_t u78; // 7848uint32_t num_ref_idx_l0_active_minus1; // 7c49uint32_t num_ref_idx_l1_active_minus1; // 8050uint32_t weighted_pred_flag; // 8451uint32_t weighted_bipred_idc; // 8852uint32_t pic_init_qp_minus26; // 8c53uint32_t chroma_qp_index_offset; // 9054uint32_t deblocking_filter_control_present_flag; // 9455uint32_t constrained_intra_pred_flag; // 9856uint32_t redundant_pic_cnt_present_flag; // 9c57uint32_t transform_8x8_mode_flag; // a058uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4];59uint32_t second_chroma_qp_index_offset; // 1c860uint32_t u1cc; // 1cc61uint32_t curr_pic_order_cnt; // 1d062uint32_t field_order_cnt[2]; // 1d463uint32_t curr_mvidx; // 1dc64struct iref {65uint32_t u00; // 0066uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom67uint8_t is_long_term; // 0868uint8_t non_existing; // 0969uint8_t u0a; // 0a70uint8_t u0b; // 0b71uint32_t frame_idx; // 0c72uint32_t field_order_cnt[2]; // 1073uint32_t mvidx; // 1874uint8_t field_pic_flag; // 1c75uint8_t u1d; // 1d76uint8_t u1e; // 1e77uint8_t u1f; // 1f78// 2079} refs[0x10]; // 1e080} ipicparm; // 15081};8283int84nv84_decoder_bsp(struct nv84_decoder *dec,85struct pipe_h264_picture_desc *desc,86unsigned num_buffers,87const void *const *data,88const unsigned *num_bytes,89struct nv84_video_buffer *dest)90{91struct iparm params;92uint32_t more_params[0x44 / 4] = {0};93unsigned total_bytes = 0;94int i;95static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0};96char indexes[17] = {0};97struct nouveau_pushbuf *push = dec->bsp_pushbuf;98struct nouveau_pushbuf_refn bo_refs[] = {99{ dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },100{ dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },101{ dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },102{ dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },103};104105nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client);106107STATIC_ASSERT(sizeof(struct iparm) == 0x530);108109memset(¶ms, 0, sizeof(params));110111dest->frame_num = dest->frame_num_max = desc->frame_num;112113for (i = 0; i < 16; i++) {114struct iref *ref = ¶ms.ipicparm.refs[i];115struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i];116if (!frame) break;117/* The frame index is relative to the last IDR frame. So once the frame118* num goes back to 0, previous reference frames need to have a negative119* index.120*/121if (desc->frame_num >= frame->frame_num_max) {122frame->frame_num_max = desc->frame_num;123} else {124frame->frame_num -= frame->frame_num_max + 1;125frame->frame_num_max = desc->frame_num;126}127ref->non_existing = 0;128ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) |129(desc->bottom_is_reference[i] ? 2 : 0);130ref->is_long_term = desc->is_long_term[i];131ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0];132ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1];133ref->frame_idx = frame->frame_num;134ref->u00 = ref->mvidx = frame->mvidx;135ref->field_pic_flag = desc->field_pic_flag;136indexes[frame->mvidx] = 1;137}138139/* Needs to be adjusted if we ever support non-4:2:0 videos */140params.iseqparm.chroma_format_idc = 1;141142params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1;143if (desc->field_pic_flag || desc->pps->sps->mb_adaptive_frame_field_flag)144params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1;145else146params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1;147148if (desc->bottom_field_flag)149params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1];150else151params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0];152params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0];153params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1];154if (desc->is_reference) {155if (dest->mvidx < 0) {156for (i = 0; i < desc->num_ref_frames + 1; i++) {157if (!indexes[i]) {158dest->mvidx = i;159break;160}161}162assert(i != desc->num_ref_frames + 1);163}164165params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx;166}167168params.iseqparm.num_ref_frames = desc->num_ref_frames;169params.iseqparm.mb_adaptive_frame_field_flag = desc->pps->sps->mb_adaptive_frame_field_flag;170params.ipicparm.constrained_intra_pred_flag = desc->pps->constrained_intra_pred_flag;171params.ipicparm.weighted_pred_flag = desc->pps->weighted_pred_flag;172params.ipicparm.weighted_bipred_idc = desc->pps->weighted_bipred_idc;173params.iseqparm.frame_mbs_only_flag = desc->pps->sps->frame_mbs_only_flag;174params.ipicparm.transform_8x8_mode_flag = desc->pps->transform_8x8_mode_flag;175params.ipicparm.chroma_qp_index_offset = desc->pps->chroma_qp_index_offset;176params.ipicparm.second_chroma_qp_index_offset = desc->pps->second_chroma_qp_index_offset;177params.ipicparm.pic_init_qp_minus26 = desc->pps->pic_init_qp_minus26;178params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1;179params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1;180params.iseqparm.log2_max_frame_num_minus4 = desc->pps->sps->log2_max_frame_num_minus4;181params.iseqparm.pic_order_cnt_type = desc->pps->sps->pic_order_cnt_type;182params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->pps->sps->log2_max_pic_order_cnt_lsb_minus4;183params.iseqparm.delta_pic_order_always_zero_flag = desc->pps->sps->delta_pic_order_always_zero_flag;184params.iseqparm.direct_8x8_inference_flag = desc->pps->sps->direct_8x8_inference_flag;185params.ipicparm.entropy_coding_mode_flag = desc->pps->entropy_coding_mode_flag;186params.ipicparm.pic_order_present_flag = desc->pps->bottom_field_pic_order_in_frame_present_flag;187params.ipicparm.deblocking_filter_control_present_flag = desc->pps->deblocking_filter_control_present_flag;188params.ipicparm.redundant_pic_cnt_present_flag = desc->pps->redundant_pic_cnt_present_flag;189190memcpy(dec->bitstream->map, ¶ms, sizeof(params));191for (i = 0; i < num_buffers; i++) {192assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700);193memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]);194total_bytes += num_bytes[i];195}196memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end));197total_bytes += sizeof(end);198more_params[1] = total_bytes;199memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params));200201PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2);202nouveau_pushbuf_refn(push, bo_refs, ARRAY_SIZE(bo_refs));203204/* Wait for the fence = 1 */205BEGIN_NV04(push, SUBC_BSP(0x10), 4);206PUSH_DATAh(push, dec->fence->offset);207PUSH_DATA (push, dec->fence->offset);208PUSH_DATA (push, 1);209PUSH_DATA (push, 1);210211/* TODO: Use both halves of bitstream/vpring for alternating frames */212213/* Kick off the BSP */214BEGIN_NV04(push, SUBC_BSP(0x400), 20);215PUSH_DATA (push, dec->bitstream->offset >> 8);216PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7);217PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);218PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6);219PUSH_DATA (push, 1);220PUSH_DATA (push, dec->mbring->offset >> 8);221PUSH_DATA (push, dec->frame_size);222PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8);223PUSH_DATA (push, dec->vpring->offset >> 8);224PUSH_DATA (push, dec->vpring->size / 2);225PUSH_DATA (push, dec->vpring_residual);226PUSH_DATA (push, dec->vpring_ctrl);227PUSH_DATA (push, 0);228PUSH_DATA (push, dec->vpring_residual);229PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl);230PUSH_DATA (push, dec->vpring_deblock);231PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +232dec->vpring_residual + dec->vpring_deblock) >> 8);233PUSH_DATA (push, 0x654321);234PUSH_DATA (push, 0);235PUSH_DATA (push, 0x100008);236237BEGIN_NV04(push, SUBC_BSP(0x620), 2);238PUSH_DATA (push, 0);239PUSH_DATA (push, 0);240241BEGIN_NV04(push, SUBC_BSP(0x300), 1);242PUSH_DATA (push, 0);243244/* Write fence = 2, intr */245BEGIN_NV04(push, SUBC_BSP(0x610), 3);246PUSH_DATAh(push, dec->fence->offset);247PUSH_DATA (push, dec->fence->offset);248PUSH_DATA (push, 2);249250BEGIN_NV04(push, SUBC_BSP(0x304), 1);251PUSH_DATA (push, 0x101);252PUSH_KICK (push);253return 0;254}255256257