Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/radeon/radeon_uvd.c
4570 views
1
/**************************************************************************
2
*
3
* Copyright 2011 Advanced Micro Devices, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
#include "radeon_uvd.h"
29
30
#include "pipe/p_video_codec.h"
31
#include "radeon_video.h"
32
#include "radeonsi/si_pipe.h"
33
#include "util/u_memory.h"
34
#include "util/u_video.h"
35
#include "vl/vl_defines.h"
36
#include "vl/vl_mpeg12_decoder.h"
37
#include <sys/types.h>
38
39
#include <assert.h>
40
#include <errno.h>
41
#include <stdio.h>
42
#include <unistd.h>
43
44
#define NUM_BUFFERS 4
45
46
#define NUM_MPEG2_REFS 6
47
#define NUM_H264_REFS 17
48
#define NUM_VC1_REFS 5
49
50
#define FB_BUFFER_OFFSET 0x1000
51
#define FB_BUFFER_SIZE 2048
52
#define FB_BUFFER_SIZE_TONGA (2048 * 64)
53
#define IT_SCALING_TABLE_SIZE 992
54
#define UVD_SESSION_CONTEXT_SIZE (128 * 1024)
55
56
/* UVD decoder representation */
57
struct ruvd_decoder {
58
struct pipe_video_codec base;
59
60
ruvd_set_dtb set_dtb;
61
62
unsigned stream_handle;
63
unsigned stream_type;
64
unsigned frame_number;
65
66
struct pipe_screen *screen;
67
struct radeon_winsys *ws;
68
struct radeon_cmdbuf cs;
69
70
unsigned cur_buffer;
71
72
struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
73
struct ruvd_msg *msg;
74
uint32_t *fb;
75
unsigned fb_size;
76
uint8_t *it;
77
78
struct rvid_buffer bs_buffers[NUM_BUFFERS];
79
void *bs_ptr;
80
unsigned bs_size;
81
82
struct rvid_buffer dpb;
83
bool use_legacy;
84
struct rvid_buffer ctx;
85
struct rvid_buffer sessionctx;
86
struct {
87
unsigned data0;
88
unsigned data1;
89
unsigned cmd;
90
unsigned cntl;
91
} reg;
92
93
void *render_pic_list[16];
94
};
95
96
/* flush IB to the hardware */
97
static int flush(struct ruvd_decoder *dec, unsigned flags)
98
{
99
return dec->ws->cs_flush(&dec->cs, flags, NULL);
100
}
101
102
/* add a new set register command to the IB */
103
static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
104
{
105
radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0));
106
radeon_emit(&dec->cs, val);
107
}
108
109
/* send a command to the VCPU through the GPCOM registers */
110
static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, struct pb_buffer *buf, uint32_t off,
111
enum radeon_bo_usage usage, enum radeon_bo_domain domain)
112
{
113
int reloc_idx;
114
115
reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0);
116
if (!dec->use_legacy) {
117
uint64_t addr;
118
addr = dec->ws->buffer_get_virtual_address(buf);
119
addr = addr + off;
120
set_reg(dec, dec->reg.data0, addr);
121
set_reg(dec, dec->reg.data1, addr >> 32);
122
} else {
123
off += dec->ws->buffer_get_reloc_offset(buf);
124
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
125
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
126
}
127
set_reg(dec, dec->reg.cmd, cmd << 1);
128
}
129
130
/* do the codec needs an IT buffer ?*/
131
static bool have_it(struct ruvd_decoder *dec)
132
{
133
return dec->stream_type == RUVD_CODEC_H264_PERF || dec->stream_type == RUVD_CODEC_H265;
134
}
135
136
/* map the next available message/feedback/itscaling buffer */
137
static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
138
{
139
struct rvid_buffer *buf;
140
uint8_t *ptr;
141
142
/* grab the current message/feedback buffer */
143
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
144
145
/* and map it for CPU access */
146
ptr =
147
dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
148
149
/* calc buffer offsets */
150
dec->msg = (struct ruvd_msg *)ptr;
151
memset(dec->msg, 0, sizeof(*dec->msg));
152
153
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
154
if (have_it(dec))
155
dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
156
}
157
158
/* unmap and send a message command to the VCPU */
159
static void send_msg_buf(struct ruvd_decoder *dec)
160
{
161
struct rvid_buffer *buf;
162
163
/* ignore the request if message/feedback buffer isn't mapped */
164
if (!dec->msg || !dec->fb)
165
return;
166
167
/* grab the current message buffer */
168
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
169
170
/* unmap the buffer */
171
dec->ws->buffer_unmap(dec->ws, buf->res->buf);
172
dec->msg = NULL;
173
dec->fb = NULL;
174
dec->it = NULL;
175
176
if (dec->sessionctx.res)
177
send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0,
178
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
179
180
/* and send it to the hardware */
181
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
182
}
183
184
/* cycle to the next set of buffers */
185
static void next_buffer(struct ruvd_decoder *dec)
186
{
187
++dec->cur_buffer;
188
dec->cur_buffer %= NUM_BUFFERS;
189
}
190
191
/* convert the profile into something UVD understands */
192
static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
193
{
194
switch (u_reduce_video_profile(dec->base.profile)) {
195
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
196
return (family >= CHIP_TONGA) ? RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;
197
198
case PIPE_VIDEO_FORMAT_VC1:
199
return RUVD_CODEC_VC1;
200
201
case PIPE_VIDEO_FORMAT_MPEG12:
202
return RUVD_CODEC_MPEG2;
203
204
case PIPE_VIDEO_FORMAT_MPEG4:
205
return RUVD_CODEC_MPEG4;
206
207
case PIPE_VIDEO_FORMAT_HEVC:
208
return RUVD_CODEC_H265;
209
210
case PIPE_VIDEO_FORMAT_JPEG:
211
return RUVD_CODEC_MJPEG;
212
213
default:
214
assert(0);
215
return 0;
216
}
217
}
218
219
static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec)
220
{
221
unsigned width_in_mb, height_in_mb, ctx_size;
222
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
223
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
224
225
unsigned max_references = dec->base.max_references + 1;
226
227
// picture width & height in 16 pixel units
228
width_in_mb = width / VL_MACROBLOCK_WIDTH;
229
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
230
231
if (!dec->use_legacy) {
232
unsigned fs_in_mb = width_in_mb * height_in_mb;
233
unsigned num_dpb_buffer;
234
switch (dec->base.level) {
235
case 30:
236
num_dpb_buffer = 8100 / fs_in_mb;
237
break;
238
case 31:
239
num_dpb_buffer = 18000 / fs_in_mb;
240
break;
241
case 32:
242
num_dpb_buffer = 20480 / fs_in_mb;
243
break;
244
case 41:
245
num_dpb_buffer = 32768 / fs_in_mb;
246
break;
247
case 42:
248
num_dpb_buffer = 34816 / fs_in_mb;
249
break;
250
case 50:
251
num_dpb_buffer = 110400 / fs_in_mb;
252
break;
253
case 51:
254
num_dpb_buffer = 184320 / fs_in_mb;
255
break;
256
default:
257
num_dpb_buffer = 184320 / fs_in_mb;
258
break;
259
}
260
num_dpb_buffer++;
261
max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
262
ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
263
} else {
264
// the firmware seems to always assume a minimum of ref frames
265
max_references = MAX2(NUM_H264_REFS, max_references);
266
// macroblock context buffer
267
ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256);
268
}
269
270
return ctx_size;
271
}
272
273
static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
274
{
275
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
276
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
277
278
unsigned max_references = dec->base.max_references + 1;
279
280
if (dec->base.width * dec->base.height >= 4096 * 2000)
281
max_references = MAX2(max_references, 8);
282
else
283
max_references = MAX2(max_references, 17);
284
285
width = align(width, 16);
286
height = align(height, 16);
287
return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
288
}
289
290
static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec,
291
struct pipe_h265_picture_desc *pic)
292
{
293
unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
294
unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
295
unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
296
297
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
298
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
299
unsigned coeff_10bit =
300
(pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
301
302
unsigned max_references = dec->base.max_references + 1;
303
304
if (dec->base.width * dec->base.height >= 4096 * 2000)
305
max_references = MAX2(max_references, 8);
306
else
307
max_references = MAX2(max_references, 17);
308
309
log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
310
pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
311
312
width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
313
height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
314
315
num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
316
context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
317
max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
318
319
cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
320
db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
321
322
return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
323
}
324
325
static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
326
{
327
if (((struct si_screen *)dec->screen)->info.family < CHIP_VEGA10)
328
return 16;
329
else
330
return 32;
331
}
332
333
/* calculate size of reference picture buffer */
334
static unsigned calc_dpb_size(struct ruvd_decoder *dec)
335
{
336
unsigned width_in_mb, height_in_mb, image_size, dpb_size;
337
338
// always align them to MB size for dpb calculation
339
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
340
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
341
342
// always one more for currently decoded picture
343
unsigned max_references = dec->base.max_references + 1;
344
345
// aligned size of a single frame
346
image_size = align(width, get_db_pitch_alignment(dec)) * height;
347
image_size += image_size / 2;
348
image_size = align(image_size, 1024);
349
350
// picture width & height in 16 pixel units
351
width_in_mb = width / VL_MACROBLOCK_WIDTH;
352
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
353
354
switch (u_reduce_video_profile(dec->base.profile)) {
355
case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
356
if (!dec->use_legacy) {
357
unsigned fs_in_mb = width_in_mb * height_in_mb;
358
unsigned alignment = 64, num_dpb_buffer;
359
360
if (dec->stream_type == RUVD_CODEC_H264_PERF)
361
alignment = 256;
362
switch (dec->base.level) {
363
case 30:
364
num_dpb_buffer = 8100 / fs_in_mb;
365
break;
366
case 31:
367
num_dpb_buffer = 18000 / fs_in_mb;
368
break;
369
case 32:
370
num_dpb_buffer = 20480 / fs_in_mb;
371
break;
372
case 41:
373
num_dpb_buffer = 32768 / fs_in_mb;
374
break;
375
case 42:
376
num_dpb_buffer = 34816 / fs_in_mb;
377
break;
378
case 50:
379
num_dpb_buffer = 110400 / fs_in_mb;
380
break;
381
case 51:
382
num_dpb_buffer = 184320 / fs_in_mb;
383
break;
384
default:
385
num_dpb_buffer = 184320 / fs_in_mb;
386
break;
387
}
388
num_dpb_buffer++;
389
max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
390
dpb_size = image_size * max_references;
391
if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||
392
(((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) {
393
dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
394
dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
395
}
396
} else {
397
// the firmware seems to allways assume a minimum of ref frames
398
max_references = MAX2(NUM_H264_REFS, max_references);
399
// reference picture buffer
400
dpb_size = image_size * max_references;
401
if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||
402
(((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) {
403
// macroblock context buffer
404
dpb_size += width_in_mb * height_in_mb * max_references * 192;
405
// IT surface buffer
406
dpb_size += width_in_mb * height_in_mb * 32;
407
}
408
}
409
break;
410
}
411
412
case PIPE_VIDEO_FORMAT_HEVC:
413
if (dec->base.width * dec->base.height >= 4096 * 2000)
414
max_references = MAX2(max_references, 8);
415
else
416
max_references = MAX2(max_references, 17);
417
418
width = align(width, 16);
419
height = align(height, 16);
420
if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
421
dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) *
422
max_references;
423
else
424
dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) *
425
max_references;
426
break;
427
428
case PIPE_VIDEO_FORMAT_VC1:
429
// the firmware seems to allways assume a minimum of ref frames
430
max_references = MAX2(NUM_VC1_REFS, max_references);
431
432
// reference picture buffer
433
dpb_size = image_size * max_references;
434
435
// CONTEXT_BUFFER
436
dpb_size += width_in_mb * height_in_mb * 128;
437
438
// IT surface buffer
439
dpb_size += width_in_mb * 64;
440
441
// DB surface buffer
442
dpb_size += width_in_mb * 128;
443
444
// BP
445
dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
446
break;
447
448
case PIPE_VIDEO_FORMAT_MPEG12:
449
// reference picture buffer, must be big enough for all frames
450
dpb_size = image_size * NUM_MPEG2_REFS;
451
break;
452
453
case PIPE_VIDEO_FORMAT_MPEG4:
454
// reference picture buffer
455
dpb_size = image_size * max_references;
456
457
// CM
458
dpb_size += width_in_mb * height_in_mb * 64;
459
460
// IT surface buffer
461
dpb_size += align(width_in_mb * height_in_mb * 32, 64);
462
463
dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
464
break;
465
466
case PIPE_VIDEO_FORMAT_JPEG:
467
dpb_size = 0;
468
break;
469
470
default:
471
// something is missing here
472
assert(0);
473
474
// at least use a sane default value
475
dpb_size = 32 * 1024 * 1024;
476
break;
477
}
478
return dpb_size;
479
}
480
481
/* free associated data in the video buffer callback */
482
static void ruvd_destroy_associated_data(void *data)
483
{
484
/* NOOP, since we only use an intptr */
485
}
486
487
/* get h264 specific message bits */
488
static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
489
{
490
struct ruvd_h264 result;
491
492
memset(&result, 0, sizeof(result));
493
switch (pic->base.profile) {
494
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
495
case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
496
result.profile = RUVD_H264_PROFILE_BASELINE;
497
break;
498
499
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
500
result.profile = RUVD_H264_PROFILE_MAIN;
501
break;
502
503
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
504
result.profile = RUVD_H264_PROFILE_HIGH;
505
break;
506
507
default:
508
assert(0);
509
break;
510
}
511
512
result.level = dec->base.level;
513
514
result.sps_info_flags = 0;
515
result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
516
result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
517
result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
518
result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
519
520
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
521
result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
522
result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
523
result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
524
result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
525
526
switch (dec->base.chroma_format) {
527
case PIPE_VIDEO_CHROMA_FORMAT_NONE:
528
/* TODO: assert? */
529
break;
530
case PIPE_VIDEO_CHROMA_FORMAT_400:
531
result.chroma_format = 0;
532
break;
533
case PIPE_VIDEO_CHROMA_FORMAT_420:
534
result.chroma_format = 1;
535
break;
536
case PIPE_VIDEO_CHROMA_FORMAT_422:
537
result.chroma_format = 2;
538
break;
539
case PIPE_VIDEO_CHROMA_FORMAT_444:
540
result.chroma_format = 3;
541
break;
542
}
543
544
result.pps_info_flags = 0;
545
result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
546
result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
547
result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
548
result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
549
result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
550
result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
551
result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
552
result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
553
554
result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
555
result.slice_group_map_type = pic->pps->slice_group_map_type;
556
result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
557
result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
558
result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
559
result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
560
561
memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16);
562
memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64);
563
564
if (dec->stream_type == RUVD_CODEC_H264_PERF) {
565
memcpy(dec->it, result.scaling_list_4x4, 6 * 16);
566
memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64);
567
}
568
569
result.num_ref_frames = pic->num_ref_frames;
570
571
result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
572
result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
573
574
result.frame_num = pic->frame_num;
575
memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16);
576
result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
577
result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
578
memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2);
579
580
result.decoded_pic_idx = pic->frame_num;
581
582
return result;
583
}
584
585
/* get h265 specific message bits */
586
static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
587
struct pipe_h265_picture_desc *pic)
588
{
589
struct ruvd_h265 result;
590
unsigned i, j;
591
592
memset(&result, 0, sizeof(result));
593
594
result.sps_info_flags = 0;
595
result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
596
result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
597
result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
598
result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
599
result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
600
result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
601
result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
602
result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
603
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
604
if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO)
605
result.sps_info_flags |= 1 << 9;
606
if (pic->UseRefPicList == true)
607
result.sps_info_flags |= 1 << 10;
608
609
result.chroma_format = pic->pps->sps->chroma_format_idc;
610
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
611
result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
612
result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
613
result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
614
result.log2_min_luma_coding_block_size_minus3 =
615
pic->pps->sps->log2_min_luma_coding_block_size_minus3;
616
result.log2_diff_max_min_luma_coding_block_size =
617
pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
618
result.log2_min_transform_block_size_minus2 =
619
pic->pps->sps->log2_min_transform_block_size_minus2;
620
result.log2_diff_max_min_transform_block_size =
621
pic->pps->sps->log2_diff_max_min_transform_block_size;
622
result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
623
result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
624
result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
625
result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
626
result.log2_min_pcm_luma_coding_block_size_minus3 =
627
pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
628
result.log2_diff_max_min_pcm_luma_coding_block_size =
629
pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
630
result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
631
632
result.pps_info_flags = 0;
633
result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
634
result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
635
result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
636
result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
637
result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
638
result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
639
result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
640
result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
641
result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
642
result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
643
result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
644
result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
645
result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
646
result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
647
result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
648
result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
649
result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
650
result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
651
result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
652
result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
653
// result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
654
655
result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
656
result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
657
result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
658
result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
659
result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
660
result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
661
result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
662
result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
663
result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
664
result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
665
result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
666
result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
667
result.init_qp_minus26 = pic->pps->init_qp_minus26;
668
669
for (i = 0; i < 19; ++i)
670
result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
671
672
for (i = 0; i < 21; ++i)
673
result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
674
675
result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
676
result.curr_poc = pic->CurrPicOrderCntVal;
677
678
for (i = 0; i < 16; i++) {
679
for (j = 0; (pic->ref[j] != NULL) && (j < 16); j++) {
680
if (dec->render_pic_list[i] == pic->ref[j])
681
break;
682
if (j == 15)
683
dec->render_pic_list[i] = NULL;
684
else if (pic->ref[j + 1] == NULL)
685
dec->render_pic_list[i] = NULL;
686
}
687
}
688
for (i = 0; i < 16; i++) {
689
if (dec->render_pic_list[i] == NULL) {
690
dec->render_pic_list[i] = target;
691
result.curr_idx = i;
692
break;
693
}
694
}
695
696
vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx,
697
&ruvd_destroy_associated_data);
698
699
for (i = 0; i < 16; ++i) {
700
struct pipe_video_buffer *ref = pic->ref[i];
701
uintptr_t ref_pic = 0;
702
703
result.poc_list[i] = pic->PicOrderCntVal[i];
704
705
if (ref)
706
ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
707
else
708
ref_pic = 0x7F;
709
result.ref_pic_list[i] = ref_pic;
710
}
711
712
for (i = 0; i < 8; ++i) {
713
result.ref_pic_set_st_curr_before[i] = 0xFF;
714
result.ref_pic_set_st_curr_after[i] = 0xFF;
715
result.ref_pic_set_lt_curr[i] = 0xFF;
716
}
717
718
for (i = 0; i < pic->NumPocStCurrBefore; ++i)
719
result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
720
721
for (i = 0; i < pic->NumPocStCurrAfter; ++i)
722
result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
723
724
for (i = 0; i < pic->NumPocLtCurr; ++i)
725
result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
726
727
for (i = 0; i < 6; ++i)
728
result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
729
730
for (i = 0; i < 2; ++i)
731
result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
732
733
memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
734
memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
735
memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
736
memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
737
738
for (i = 0; i < 2; i++) {
739
for (j = 0; j < 15; j++)
740
result.direct_reflist[i][j] = pic->RefPicList[i][j];
741
}
742
743
if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
744
if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
745
result.p010_mode = 1;
746
result.msb_mode = 1;
747
} else {
748
result.luma_10to8 = 5;
749
result.chroma_10to8 = 5;
750
result.sclr_luma10to8 = 4;
751
result.sclr_chroma10to8 = 4;
752
}
753
}
754
755
/* TODO
756
result.highestTid;
757
result.isNonRef;
758
759
IDRPicFlag;
760
RAPPicFlag;
761
NumPocTotalCurr;
762
NumShortTermPictureSliceHeaderBits;
763
NumLongTermPictureSliceHeaderBits;
764
765
IsLongTerm[16];
766
*/
767
768
return result;
769
}
770
771
/* get vc1 specific message bits */
772
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
773
{
774
struct ruvd_vc1 result;
775
776
memset(&result, 0, sizeof(result));
777
778
switch (pic->base.profile) {
779
case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
780
result.profile = RUVD_VC1_PROFILE_SIMPLE;
781
result.level = 1;
782
break;
783
784
case PIPE_VIDEO_PROFILE_VC1_MAIN:
785
result.profile = RUVD_VC1_PROFILE_MAIN;
786
result.level = 2;
787
break;
788
789
case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
790
result.profile = RUVD_VC1_PROFILE_ADVANCED;
791
result.level = 4;
792
break;
793
794
default:
795
assert(0);
796
}
797
798
/* fields common for all profiles */
799
result.sps_info_flags |= pic->postprocflag << 7;
800
result.sps_info_flags |= pic->pulldown << 6;
801
result.sps_info_flags |= pic->interlace << 5;
802
result.sps_info_flags |= pic->tfcntrflag << 4;
803
result.sps_info_flags |= pic->finterpflag << 3;
804
result.sps_info_flags |= pic->psf << 1;
805
806
result.pps_info_flags |= pic->range_mapy_flag << 31;
807
result.pps_info_flags |= pic->range_mapy << 28;
808
result.pps_info_flags |= pic->range_mapuv_flag << 27;
809
result.pps_info_flags |= pic->range_mapuv << 24;
810
result.pps_info_flags |= pic->multires << 21;
811
result.pps_info_flags |= pic->maxbframes << 16;
812
result.pps_info_flags |= pic->overlap << 11;
813
result.pps_info_flags |= pic->quantizer << 9;
814
result.pps_info_flags |= pic->panscan_flag << 7;
815
result.pps_info_flags |= pic->refdist_flag << 6;
816
result.pps_info_flags |= pic->vstransform << 0;
817
818
/* some fields only apply to main/advanced profile */
819
if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
820
result.pps_info_flags |= pic->syncmarker << 20;
821
result.pps_info_flags |= pic->rangered << 19;
822
result.pps_info_flags |= pic->loopfilter << 5;
823
result.pps_info_flags |= pic->fastuvmc << 4;
824
result.pps_info_flags |= pic->extended_mv << 3;
825
result.pps_info_flags |= pic->extended_dmv << 8;
826
result.pps_info_flags |= pic->dquant << 1;
827
}
828
829
result.chroma_format = 1;
830
831
#if 0
832
//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
833
uint32_t slice_count
834
uint8_t picture_type
835
uint8_t frame_coding_mode
836
uint8_t deblockEnable
837
uint8_t pquant
838
#endif
839
840
return result;
841
}
842
843
/* extract the frame number from a referenced video buffer */
844
static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
845
{
846
uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
847
uint32_t max = MAX2(dec->frame_number, 1) - 1;
848
uintptr_t frame;
849
850
/* seems to be the most sane fallback */
851
if (!ref)
852
return max;
853
854
/* get the frame number from the associated data */
855
frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
856
857
/* limit the frame number to a valid range */
858
return MAX2(MIN2(frame, max), min);
859
}
860
861
/* get mpeg2 specific msg bits */
862
static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
863
struct pipe_mpeg12_picture_desc *pic)
864
{
865
const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
866
struct ruvd_mpeg2 result;
867
unsigned i;
868
869
memset(&result, 0, sizeof(result));
870
result.decoded_pic_idx = dec->frame_number;
871
for (i = 0; i < 2; ++i)
872
result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
873
874
if (pic->intra_matrix) {
875
result.load_intra_quantiser_matrix = 1;
876
for (i = 0; i < 64; ++i) {
877
result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
878
}
879
}
880
if (pic->non_intra_matrix) {
881
result.load_nonintra_quantiser_matrix = 1;
882
for (i = 0; i < 64; ++i) {
883
result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
884
}
885
}
886
887
result.profile_and_level_indication = 0;
888
result.chroma_format = 0x1;
889
890
result.picture_coding_type = pic->picture_coding_type;
891
result.f_code[0][0] = pic->f_code[0][0] + 1;
892
result.f_code[0][1] = pic->f_code[0][1] + 1;
893
result.f_code[1][0] = pic->f_code[1][0] + 1;
894
result.f_code[1][1] = pic->f_code[1][1] + 1;
895
result.intra_dc_precision = pic->intra_dc_precision;
896
result.pic_structure = pic->picture_structure;
897
result.top_field_first = pic->top_field_first;
898
result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
899
result.concealment_motion_vectors = pic->concealment_motion_vectors;
900
result.q_scale_type = pic->q_scale_type;
901
result.intra_vlc_format = pic->intra_vlc_format;
902
result.alternate_scan = pic->alternate_scan;
903
904
return result;
905
}
906
907
/* get mpeg4 specific msg bits */
908
static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
909
struct pipe_mpeg4_picture_desc *pic)
910
{
911
struct ruvd_mpeg4 result;
912
unsigned i;
913
914
memset(&result, 0, sizeof(result));
915
result.decoded_pic_idx = dec->frame_number;
916
for (i = 0; i < 2; ++i)
917
result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
918
919
result.variant_type = 0;
920
result.profile_and_level_indication = 0xF0; // ASP Level0
921
922
result.video_object_layer_verid = 0x5; // advanced simple
923
result.video_object_layer_shape = 0x0; // rectangular
924
925
result.video_object_layer_width = dec->base.width;
926
result.video_object_layer_height = dec->base.height;
927
928
result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
929
930
result.flags |= pic->short_video_header << 0;
931
// result.flags |= obmc_disable << 1;
932
result.flags |= pic->interlaced << 2;
933
result.flags |= 1 << 3; // load_intra_quant_mat
934
result.flags |= 1 << 4; // load_nonintra_quant_mat
935
result.flags |= pic->quarter_sample << 5;
936
result.flags |= 1 << 6; // complexity_estimation_disable
937
result.flags |= pic->resync_marker_disable << 7;
938
// result.flags |= data_partitioned << 8;
939
// result.flags |= reversible_vlc << 9;
940
result.flags |= 0 << 10; // newpred_enable
941
result.flags |= 0 << 11; // reduced_resolution_vop_enable
942
// result.flags |= scalability << 12;
943
// result.flags |= is_object_layer_identifier << 13;
944
// result.flags |= fixed_vop_rate << 14;
945
// result.flags |= newpred_segment_type << 15;
946
947
result.quant_type = pic->quant_type;
948
949
for (i = 0; i < 64; ++i) {
950
result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
951
result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
952
}
953
954
/*
955
int32_t trd [2]
956
int32_t trb [2]
957
uint8_t vop_coding_type
958
uint8_t vop_fcode_forward
959
uint8_t vop_fcode_backward
960
uint8_t rounding_control
961
uint8_t alternate_vertical_scan_flag
962
uint8_t top_field_first
963
*/
964
965
return result;
966
}
967
968
/**
969
* destroy this video decoder
970
*/
971
static void ruvd_destroy(struct pipe_video_codec *decoder)
972
{
973
struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;
974
unsigned i;
975
976
assert(decoder);
977
978
map_msg_fb_it_buf(dec);
979
dec->msg->size = sizeof(*dec->msg);
980
dec->msg->msg_type = RUVD_MSG_DESTROY;
981
dec->msg->stream_handle = dec->stream_handle;
982
send_msg_buf(dec);
983
984
flush(dec, 0);
985
986
dec->ws->cs_destroy(&dec->cs);
987
988
for (i = 0; i < NUM_BUFFERS; ++i) {
989
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
990
si_vid_destroy_buffer(&dec->bs_buffers[i]);
991
}
992
993
si_vid_destroy_buffer(&dec->dpb);
994
si_vid_destroy_buffer(&dec->ctx);
995
si_vid_destroy_buffer(&dec->sessionctx);
996
997
FREE(dec);
998
}
999
1000
/**
1001
* start decoding of a new frame
1002
*/
1003
static void ruvd_begin_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
1004
struct pipe_picture_desc *picture)
1005
{
1006
struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;
1007
uintptr_t frame;
1008
1009
assert(decoder);
1010
1011
frame = ++dec->frame_number;
1012
vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
1013
&ruvd_destroy_associated_data);
1014
1015
dec->bs_size = 0;
1016
dec->bs_ptr = dec->ws->buffer_map(dec->ws, dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs,
1017
PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
1018
}
1019
1020
/**
1021
* decode a macroblock
1022
*/
1023
static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
1024
struct pipe_video_buffer *target,
1025
struct pipe_picture_desc *picture,
1026
const struct pipe_macroblock *macroblocks,
1027
unsigned num_macroblocks)
1028
{
1029
/* not supported (yet) */
1030
assert(0);
1031
}
1032
1033
/**
1034
* decode a bitstream
1035
*/
1036
static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
1037
struct pipe_video_buffer *target,
1038
struct pipe_picture_desc *picture, unsigned num_buffers,
1039
const void *const *buffers, const unsigned *sizes)
1040
{
1041
struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;
1042
unsigned i;
1043
1044
assert(decoder);
1045
1046
if (!dec->bs_ptr)
1047
return;
1048
1049
for (i = 0; i < num_buffers; ++i) {
1050
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
1051
unsigned new_size = dec->bs_size + sizes[i];
1052
1053
if (new_size > buf->res->buf->size) {
1054
dec->ws->buffer_unmap(dec->ws, buf->res->buf);
1055
if (!si_vid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) {
1056
RVID_ERR("Can't resize bitstream buffer!");
1057
return;
1058
}
1059
1060
dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
1061
PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
1062
if (!dec->bs_ptr)
1063
return;
1064
1065
dec->bs_ptr += dec->bs_size;
1066
}
1067
1068
memcpy(dec->bs_ptr, buffers[i], sizes[i]);
1069
dec->bs_size += sizes[i];
1070
dec->bs_ptr += sizes[i];
1071
}
1072
}
1073
1074
/**
1075
* end decoding of the current frame
1076
*/
1077
static void ruvd_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
1078
struct pipe_picture_desc *picture)
1079
{
1080
struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;
1081
struct pb_buffer *dt;
1082
struct rvid_buffer *msg_fb_it_buf, *bs_buf;
1083
unsigned bs_size;
1084
1085
assert(decoder);
1086
1087
if (!dec->bs_ptr)
1088
return;
1089
1090
msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
1091
bs_buf = &dec->bs_buffers[dec->cur_buffer];
1092
1093
bs_size = align(dec->bs_size, 128);
1094
memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
1095
dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);
1096
1097
map_msg_fb_it_buf(dec);
1098
dec->msg->size = sizeof(*dec->msg);
1099
dec->msg->msg_type = RUVD_MSG_DECODE;
1100
dec->msg->stream_handle = dec->stream_handle;
1101
dec->msg->status_report_feedback_number = dec->frame_number;
1102
1103
dec->msg->body.decode.stream_type = dec->stream_type;
1104
dec->msg->body.decode.decode_flags = 0x1;
1105
dec->msg->body.decode.width_in_samples = dec->base.width;
1106
dec->msg->body.decode.height_in_samples = dec->base.height;
1107
1108
if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
1109
(picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
1110
dec->msg->body.decode.width_in_samples =
1111
align(dec->msg->body.decode.width_in_samples, 16) / 16;
1112
dec->msg->body.decode.height_in_samples =
1113
align(dec->msg->body.decode.height_in_samples, 16) / 16;
1114
}
1115
1116
if (dec->dpb.res)
1117
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
1118
dec->msg->body.decode.bsd_size = bs_size;
1119
dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
1120
1121
if (dec->stream_type == RUVD_CODEC_H264_PERF &&
1122
((struct si_screen *)dec->screen)->info.family >= CHIP_POLARIS10)
1123
dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;
1124
1125
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
1126
if (((struct si_screen *)dec->screen)->info.family >= CHIP_STONEY)
1127
dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;
1128
1129
switch (u_reduce_video_profile(picture->profile)) {
1130
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
1131
dec->msg->body.decode.codec.h264 =
1132
get_h264_msg(dec, (struct pipe_h264_picture_desc *)picture);
1133
break;
1134
1135
case PIPE_VIDEO_FORMAT_HEVC:
1136
dec->msg->body.decode.codec.h265 =
1137
get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture);
1138
if (dec->ctx.res == NULL) {
1139
unsigned ctx_size;
1140
if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
1141
ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture);
1142
else
1143
ctx_size = calc_ctx_size_h265_main(dec);
1144
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
1145
RVID_ERR("Can't allocated context buffer.\n");
1146
}
1147
si_vid_clear_buffer(decoder->context, &dec->ctx);
1148
}
1149
1150
if (dec->ctx.res)
1151
dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;
1152
break;
1153
1154
case PIPE_VIDEO_FORMAT_VC1:
1155
dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture);
1156
break;
1157
1158
case PIPE_VIDEO_FORMAT_MPEG12:
1159
dec->msg->body.decode.codec.mpeg2 =
1160
get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture);
1161
break;
1162
1163
case PIPE_VIDEO_FORMAT_MPEG4:
1164
dec->msg->body.decode.codec.mpeg4 =
1165
get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture);
1166
break;
1167
1168
case PIPE_VIDEO_FORMAT_JPEG:
1169
break;
1170
1171
default:
1172
assert(0);
1173
return;
1174
}
1175
1176
dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
1177
dec->msg->body.decode.extension_support = 0x1;
1178
1179
/* set at least the feedback buffer size */
1180
dec->fb[0] = dec->fb_size;
1181
1182
send_msg_buf(dec);
1183
1184
if (dec->dpb.res)
1185
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,
1186
RADEON_DOMAIN_VRAM);
1187
1188
if (dec->ctx.res)
1189
send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE,
1190
RADEON_DOMAIN_VRAM);
1191
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ,
1192
RADEON_DOMAIN_GTT);
1193
send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
1194
send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, FB_BUFFER_OFFSET,
1195
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
1196
if (have_it(dec))
1197
send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
1198
FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
1199
set_reg(dec, dec->reg.cntl, 1);
1200
1201
flush(dec, PIPE_FLUSH_ASYNC);
1202
next_buffer(dec);
1203
}
1204
1205
/**
1206
* flush any outstanding command buffers to the hardware
1207
*/
1208
static void ruvd_flush(struct pipe_video_codec *decoder)
1209
{
1210
}
1211
1212
/**
1213
* create and UVD decoder
1214
*/
1215
struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
1216
const struct pipe_video_codec *templ,
1217
ruvd_set_dtb set_dtb)
1218
{
1219
struct si_context *sctx = (struct si_context *)context;
1220
struct radeon_winsys *ws = sctx->ws;
1221
unsigned dpb_size;
1222
unsigned width = templ->width, height = templ->height;
1223
unsigned bs_buf_size;
1224
struct ruvd_decoder *dec;
1225
int r, i;
1226
1227
switch (u_reduce_video_profile(templ->profile)) {
1228
case PIPE_VIDEO_FORMAT_MPEG12:
1229
if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
1230
return vl_create_mpeg12_decoder(context, templ);
1231
1232
FALLTHROUGH;
1233
case PIPE_VIDEO_FORMAT_MPEG4:
1234
width = align(width, VL_MACROBLOCK_WIDTH);
1235
height = align(height, VL_MACROBLOCK_HEIGHT);
1236
break;
1237
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
1238
width = align(width, VL_MACROBLOCK_WIDTH);
1239
height = align(height, VL_MACROBLOCK_HEIGHT);
1240
break;
1241
1242
default:
1243
break;
1244
}
1245
1246
dec = CALLOC_STRUCT(ruvd_decoder);
1247
1248
if (!dec)
1249
return NULL;
1250
1251
if (!sctx->screen->info.is_amdgpu)
1252
dec->use_legacy = true;
1253
1254
dec->base = *templ;
1255
dec->base.context = context;
1256
dec->base.width = width;
1257
dec->base.height = height;
1258
1259
dec->base.destroy = ruvd_destroy;
1260
dec->base.begin_frame = ruvd_begin_frame;
1261
dec->base.decode_macroblock = ruvd_decode_macroblock;
1262
dec->base.decode_bitstream = ruvd_decode_bitstream;
1263
dec->base.end_frame = ruvd_end_frame;
1264
dec->base.flush = ruvd_flush;
1265
1266
dec->stream_type = profile2stream_type(dec, sctx->family);
1267
dec->set_dtb = set_dtb;
1268
dec->stream_handle = si_vid_alloc_stream_handle();
1269
dec->screen = context->screen;
1270
dec->ws = ws;
1271
1272
if (!ws->cs_create(&dec->cs, sctx->ctx, RING_UVD, NULL, NULL, false)) {
1273
RVID_ERR("Can't get command submission context.\n");
1274
goto error;
1275
}
1276
1277
for (i = 0; i < 16; i++)
1278
dec->render_pic_list[i] = NULL;
1279
dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
1280
bs_buf_size = width * height * (512 / (16 * 16));
1281
for (i = 0; i < NUM_BUFFERS; ++i) {
1282
unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;
1283
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
1284
if (have_it(dec))
1285
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
1286
if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], msg_fb_it_size,
1287
PIPE_USAGE_STAGING)) {
1288
RVID_ERR("Can't allocated message buffers.\n");
1289
goto error;
1290
}
1291
1292
if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size,
1293
PIPE_USAGE_STAGING)) {
1294
RVID_ERR("Can't allocated bitstream buffers.\n");
1295
goto error;
1296
}
1297
1298
si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
1299
si_vid_clear_buffer(context, &dec->bs_buffers[i]);
1300
}
1301
1302
dpb_size = calc_dpb_size(dec);
1303
if (dpb_size) {
1304
if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
1305
RVID_ERR("Can't allocated dpb.\n");
1306
goto error;
1307
}
1308
si_vid_clear_buffer(context, &dec->dpb);
1309
}
1310
1311
if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) {
1312
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
1313
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
1314
RVID_ERR("Can't allocated context buffer.\n");
1315
goto error;
1316
}
1317
si_vid_clear_buffer(context, &dec->ctx);
1318
}
1319
1320
if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) {
1321
if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, UVD_SESSION_CONTEXT_SIZE,
1322
PIPE_USAGE_DEFAULT)) {
1323
RVID_ERR("Can't allocated session ctx.\n");
1324
goto error;
1325
}
1326
si_vid_clear_buffer(context, &dec->sessionctx);
1327
}
1328
1329
if (sctx->family >= CHIP_VEGA10) {
1330
dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
1331
dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
1332
dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
1333
dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15;
1334
} else {
1335
dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
1336
dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
1337
dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
1338
dec->reg.cntl = RUVD_ENGINE_CNTL;
1339
}
1340
1341
map_msg_fb_it_buf(dec);
1342
dec->msg->size = sizeof(*dec->msg);
1343
dec->msg->msg_type = RUVD_MSG_CREATE;
1344
dec->msg->stream_handle = dec->stream_handle;
1345
dec->msg->body.create.stream_type = dec->stream_type;
1346
dec->msg->body.create.width_in_samples = dec->base.width;
1347
dec->msg->body.create.height_in_samples = dec->base.height;
1348
dec->msg->body.create.dpb_size = dpb_size;
1349
send_msg_buf(dec);
1350
r = flush(dec, 0);
1351
if (r)
1352
goto error;
1353
1354
next_buffer(dec);
1355
1356
return &dec->base;
1357
1358
error:
1359
dec->ws->cs_destroy(&dec->cs);
1360
1361
for (i = 0; i < NUM_BUFFERS; ++i) {
1362
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
1363
si_vid_destroy_buffer(&dec->bs_buffers[i]);
1364
}
1365
1366
si_vid_destroy_buffer(&dec->dpb);
1367
si_vid_destroy_buffer(&dec->ctx);
1368
si_vid_destroy_buffer(&dec->sessionctx);
1369
1370
FREE(dec);
1371
1372
return NULL;
1373
}
1374
1375
/* calculate top/bottom offset */
1376
static unsigned texture_offset(struct radeon_surf *surface, unsigned layer,
1377
enum ruvd_surface_type type)
1378
{
1379
switch (type) {
1380
default:
1381
case RUVD_SURFACE_TYPE_LEGACY:
1382
return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
1383
layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
1384
break;
1385
case RUVD_SURFACE_TYPE_GFX9:
1386
return surface->u.gfx9.surf_offset + layer * surface->u.gfx9.surf_slice_size;
1387
break;
1388
}
1389
}
1390
1391
/* hw encode the aspect of macro tiles */
1392
static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
1393
{
1394
switch (macro_tile_aspect) {
1395
default:
1396
case 1:
1397
macro_tile_aspect = 0;
1398
break;
1399
case 2:
1400
macro_tile_aspect = 1;
1401
break;
1402
case 4:
1403
macro_tile_aspect = 2;
1404
break;
1405
case 8:
1406
macro_tile_aspect = 3;
1407
break;
1408
}
1409
return macro_tile_aspect;
1410
}
1411
1412
/* hw encode the bank width and height */
1413
static unsigned bank_wh(unsigned bankwh)
1414
{
1415
switch (bankwh) {
1416
default:
1417
case 1:
1418
bankwh = 0;
1419
break;
1420
case 2:
1421
bankwh = 1;
1422
break;
1423
case 4:
1424
bankwh = 2;
1425
break;
1426
case 8:
1427
bankwh = 3;
1428
break;
1429
}
1430
return bankwh;
1431
}
1432
1433
/**
1434
* fill decoding target field from the luma and chroma surfaces
1435
*/
1436
void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
1437
struct radeon_surf *chroma, enum ruvd_surface_type type)
1438
{
1439
switch (type) {
1440
default:
1441
case RUVD_SURFACE_TYPE_LEGACY:
1442
msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
1443
switch (luma->u.legacy.level[0].mode) {
1444
case RADEON_SURF_MODE_LINEAR_ALIGNED:
1445
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
1446
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
1447
break;
1448
case RADEON_SURF_MODE_1D:
1449
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1450
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
1451
break;
1452
case RADEON_SURF_MODE_2D:
1453
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1454
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
1455
break;
1456
default:
1457
assert(0);
1458
break;
1459
}
1460
1461
msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
1462
if (chroma)
1463
msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
1464
if (msg->body.decode.dt_field_mode) {
1465
msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
1466
if (chroma)
1467
msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
1468
} else {
1469
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
1470
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
1471
}
1472
1473
if (chroma) {
1474
assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
1475
assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
1476
assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
1477
}
1478
1479
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
1480
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
1481
msg->body.decode.dt_surf_tile_config |=
1482
RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
1483
break;
1484
case RUVD_SURFACE_TYPE_GFX9:
1485
msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w;
1486
/* SWIZZLE LINEAR MODE */
1487
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
1488
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
1489
msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
1490
msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
1491
if (msg->body.decode.dt_field_mode) {
1492
msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
1493
msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
1494
} else {
1495
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
1496
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
1497
}
1498
msg->body.decode.dt_surf_tile_config = 0;
1499
break;
1500
}
1501
}
1502
1503