Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
4574 views
/*1* Copyright 2011-2013 Maarten Lankhorst2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "nvc0/nvc0_video.h"2324#if NOUVEAU_VP3_DEBUG_FENCE25static void dump_comm_bsp(struct comm *comm)26{27unsigned idx = comm->bsp_cur_index & 0xf;28debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs);29debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]);30}31#endif3233unsigned34nvc0_decoder_bsp_begin(struct nouveau_vp3_decoder *dec, unsigned comm_seq)35{36struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];37unsigned ret = 0;3839ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);40if (ret) {41debug_printf("map failed: %i %s\n", ret, strerror(-ret));42return -1;43}4445nouveau_vp3_bsp_begin(dec);4647return 2;48}4950unsigned51nvc0_decoder_bsp_next(struct nouveau_vp3_decoder *dec,52unsigned comm_seq, unsigned num_buffers,53const void *const *data, const unsigned *num_bytes)54{55struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];56struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];57uint32_t bsp_size = 0;58uint32_t i = 0;59unsigned ret = 0;6061bsp_size = dec->bsp_ptr - (char *)bsp_bo->map;62for (i = 0; i < num_buffers; i++)63bsp_size += num_bytes[i];64bsp_size += 256; /* the 4 end markers */6566if (bsp_size > bsp_bo->size) {67union nouveau_bo_config cfg;68struct nouveau_bo *tmp_bo = NULL;6970cfg.nvc0.tile_mode = 0x10;71cfg.nvc0.memtype = 0xfe;7273/* round up to the nearest mb */74bsp_size += (1 << 20) - 1;75bsp_size &= ~((1 << 20) - 1);7677ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);78if (ret) {79debug_printf("reallocating bsp %u -> %u failed with %i\n",80(unsigned)bsp_bo->size, bsp_size, ret);81return -1;82}8384ret = nouveau_bo_map(tmp_bo, NOUVEAU_BO_WR, dec->client);85if (ret) {86debug_printf("map failed: %i %s\n", ret, strerror(-ret));87return -1;88}8990/* Preserve previous buffer. */91/* TODO: offload this copy to the GPU, as otherwise we're reading and92* writing to VRAM. */93memcpy(tmp_bo->map, bsp_bo->map, bsp_bo->size);9495/* update position to current chunk */96dec->bsp_ptr = tmp_bo->map + (dec->bsp_ptr - (char *)bsp_bo->map);9798nouveau_bo_ref(NULL, &bsp_bo);99dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;100}101102if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {103union nouveau_bo_config cfg;104struct nouveau_bo *tmp_bo = NULL;105106cfg.nvc0.tile_mode = 0x10;107cfg.nvc0.memtype = 0xfe;108109ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, &cfg, &tmp_bo);110if (ret) {111debug_printf("reallocating inter %u -> %u failed with %i\n",112inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);113return -1;114}115116ret = nouveau_bo_map(tmp_bo, NOUVEAU_BO_WR, dec->client);117if (ret) {118debug_printf("map failed: %i %s\n", ret, strerror(-ret));119return -1;120}121122nouveau_bo_ref(NULL, &inter_bo);123dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;124}125126nouveau_vp3_bsp_next(dec, num_buffers, data, num_bytes);127128return 2;129}130131132unsigned133nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,134struct nouveau_vp3_video_buffer *target, unsigned comm_seq,135unsigned *vp_caps, unsigned *is_ref,136struct nouveau_vp3_video_buffer *refs[16])137{138struct nouveau_pushbuf *push = dec->pushbuf[0];139enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);140uint32_t bsp_addr, comm_addr, inter_addr;141uint32_t slice_size, bucket_size, ring_size;142uint32_t caps;143struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];144struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];145struct nouveau_pushbuf_refn bo_refs[] = {146{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },147{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },148#if NOUVEAU_VP3_DEBUG_FENCE149{ dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },150#endif151{ dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },152};153int num_refs = ARRAY_SIZE(bo_refs);154155if (!dec->bitplane_bo)156num_refs--;157158caps = nouveau_vp3_bsp_end(dec, desc);159160nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);161162nouveau_pushbuf_space(push, 32, num_refs, 0);163nouveau_pushbuf_refn(push, bo_refs, num_refs);164165bsp_addr = bsp_bo->offset >> 8;166inter_addr = inter_bo->offset >> 8;167168#if NOUVEAU_VP3_DEBUG_FENCE169memset(dec->comm, 0, 0x200);170comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8;171#else172comm_addr = bsp_addr + (COMM_OFFSET>>8);173#endif174175BEGIN_NVC0(push, SUBC_BSP(0x700), 5);176PUSH_DATA (push, caps); // 700 cmd177PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp178PUSH_DATA (push, bsp_addr + 7); // 708 str addr179PUSH_DATA (push, comm_addr); // 70c comm180PUSH_DATA (push, comm_seq); // 710 seq181182if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) {183u32 bitplane_addr;184185bitplane_addr = dec->bitplane_bo->offset >> 8;186187nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);188BEGIN_NVC0(push, SUBC_BSP(0x400), 6);189PUSH_DATA (push, bsp_addr); // 400 picparm addr190PUSH_DATA (push, inter_addr); // 404 interparm addr191PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr192PUSH_DATA (push, ring_size << 8); // 40c interdata_size193PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA194PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE195} else {196nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);197BEGIN_NVC0(push, SUBC_BSP(0x400), 8);198PUSH_DATA (push, bsp_addr); // 400 picparm addr199PUSH_DATA (push, inter_addr); // 404 interparm addr200PUSH_DATA (push, slice_size << 8); // 408 interparm size?201PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr202PUSH_DATA (push, ring_size << 8); // 410 interdata size203PUSH_DATA (push, inter_addr + slice_size); // 414 bucket?204PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted..205PUSH_DATA (push, 0); // 41c targets206// TODO: Double check 414 / 418 with nvidia trace207}208209#if NOUVEAU_VP3_DEBUG_FENCE210BEGIN_NVC0(push, SUBC_BSP(0x240), 3);211PUSH_DATAh(push, dec->fence_bo->offset);212PUSH_DATA (push, dec->fence_bo->offset);213PUSH_DATA (push, dec->fence_seq);214215BEGIN_NVC0(push, SUBC_BSP(0x300), 1);216PUSH_DATA (push, 1);217PUSH_KICK (push);218219{220unsigned spin = 0;221do {222usleep(100);223if ((spin++ & 0xff) == 0xff) {224debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]);225dump_comm_bsp(dec->comm);226}227} while (dec->fence_seq > dec->fence_map[0]);228}229230dump_comm_bsp(dec->comm);231return dec->comm->status[comm_seq & 0xf];232#else233BEGIN_NVC0(push, SUBC_BSP(0x300), 1);234PUSH_DATA (push, 0);235PUSH_KICK (push);236return 2;237#endif238}239240241