CoCalc -- nv84_video

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
⁸⁴⁹⁸ views
1
/*
2
 * Copyright 2013 Ilia Mirkin
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22

23
#include "nv50/nv84_video.h"
24

25
#include "util/u_sse.h"
26

27
struct h264_iparm1 {
28
   uint8_t scaling_lists_4x4[6][16]; // 00
29
   uint8_t scaling_lists_8x8[2][64]; // 60
30
   uint32_t width; // e0
31
   uint32_t height; // e4
32
   uint64_t ref1_addrs[16]; // e8
33
   uint64_t ref2_addrs[16]; // 168
34
   uint32_t unk1e8;
35
   uint32_t unk1ec;
36
   uint32_t w1; // 1f0
37
   uint32_t w2; // 1f4
38
   uint32_t w3; // 1f8
39
   uint32_t h1; // 1fc
40
   uint32_t h2; // 200
41
   uint32_t h3; // 204
42
   uint32_t mb_adaptive_frame_field_flag; // 208
43
   uint32_t field_pic_flag; // 20c
44
   uint32_t format; // 210
45
   uint32_t unk214; // 214
46
};
47

48
struct h264_iparm2 {
49
   uint32_t width; // 00
50
   uint32_t height; // 04
51
   uint32_t mbs; // 08
52
   uint32_t w1; // 0c
53
   uint32_t w2; // 10
54
   uint32_t w3; // 14
55
   uint32_t h1; // 18
56
   uint32_t h2; // 1c
57
   uint32_t h3; // 20
58
   uint32_t unk24;
59
   uint32_t mb_adaptive_frame_field_flag; // 28
60
   uint32_t top; // 2c
61
   uint32_t bottom; // 30
62
   uint32_t is_reference; // 34
63
};
64

65
void
66
nv84_decoder_vp_h264(struct nv84_decoder *dec,
67
                     struct pipe_h264_picture_desc *desc,
68
                     struct nv84_video_buffer *dest)
69
{
70
   struct h264_iparm1 param1;
71
   struct h264_iparm2 param2;
72
   int i, width = align(dest->base.width, 16),
73
      height = align(dest->base.height, 16);
74

75
   struct nouveau_pushbuf *push = dec->vp_pushbuf;
76
   struct nouveau_pushbuf_refn bo_refs[] = {
77
      { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
78
      { dest->full, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
79
      { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
80
      { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
81
      { dec->vp_params, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
82
      { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
83
   };
84
   int num_refs = ARRAY_SIZE(bo_refs);
85
   bool is_ref = desc->is_reference;
86

87
   STATIC_ASSERT(sizeof(struct h264_iparm1) == 0x218);
88
   STATIC_ASSERT(sizeof(struct h264_iparm2) == 0x38);
89

90
   memset(&param1, 0, sizeof(param1));
91
   memset(&param2, 0, sizeof(param2));
92

93
   memcpy(&param1.scaling_lists_4x4, desc->pps->ScalingList4x4,
94
          sizeof(param1.scaling_lists_4x4));
95
   memcpy(&param1.scaling_lists_8x8, desc->pps->ScalingList8x8,
96
          sizeof(param1.scaling_lists_8x8));
97

98
   param1.width = width;
99
   param1.w1 = param1.w2 = param1.w3 = align(width, 64);
100
   param1.height = param1.h2 = height;
101
   param1.h1 = param1.h3 = align(height, 32);
102
   param1.format = 0x3231564e; /* 'NV12' */
103
   param1.mb_adaptive_frame_field_flag = desc->pps->sps->mb_adaptive_frame_field_flag;
104
   param1.field_pic_flag = desc->field_pic_flag;
105

106
   param2.width = width;
107
   param2.w1 = param2.w2 = param2.w3 = param1.w1;
108
   if (desc->field_pic_flag)
109
      param2.height = align(height, 32) / 2;
110
   else
111
      param2.height = height;
112
   param2.h1 = param2.h2 = align(height, 32);
113
   param2.h3 = height;
114
   param2.mbs = width * height >> 8;
115
   if (desc->field_pic_flag) {
116
      param2.top = desc->bottom_field_flag ? 2 : 1;
117
      param2.bottom = desc->bottom_field_flag;
118
   }
119
   param2.mb_adaptive_frame_field_flag = desc->pps->sps->mb_adaptive_frame_field_flag;
120
   param2.is_reference = desc->is_reference;
121

122
   PUSH_SPACE(push, 5 + 16 + 3 + 2 + 6 + (is_ref ? 2 : 0) + 3 + 2 + 4 + 2);
123

124
   struct nouveau_bo *ref2_default = dest->full;
125

126
   for (i = 0; i < 16; i++) {
127
      struct nv84_video_buffer *buf = (struct nv84_video_buffer *)desc->ref[i];
128
      struct nouveau_bo *bo1, *bo2;
129
      if (buf) {
130
         bo1 = buf->interlaced;
131
         bo2 = buf->full;
132
         if (i == 0)
133
            ref2_default = buf->full;
134
      } else {
135
         bo1 = dest->interlaced;
136
         bo2 = ref2_default;
137
      }
138
      param1.ref1_addrs[i] = bo1->offset;
139
      param1.ref2_addrs[i] = bo2->offset;
140
      struct nouveau_pushbuf_refn bo_refs[] = {
141
         { bo1, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
142
         { bo2, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
143
      };
144
      nouveau_pushbuf_refn(push, bo_refs, ARRAY_SIZE(bo_refs));
145
   }
146

147
   memcpy(dec->vp_params->map, &param1, sizeof(param1));
148
   memcpy(dec->vp_params->map + 0x400, &param2, sizeof(param2));
149

150
   nouveau_pushbuf_refn(push, bo_refs, num_refs);
151

152
   /* Wait for BSP to have completed */
153
   BEGIN_NV04(push, SUBC_VP(0x10), 4);
154
   PUSH_DATAh(push, dec->fence->offset);
155
   PUSH_DATA (push, dec->fence->offset);
156
   PUSH_DATA (push, 2);
157
   PUSH_DATA (push, 1); /* wait for sem == 2 */
158

159
   /* VP step 1 */
160
   BEGIN_NV04(push, SUBC_VP(0x400), 15);
161
   PUSH_DATA (push, 1);
162
   PUSH_DATA (push, param2.mbs);
163
   PUSH_DATA (push, 0x3987654); /* each nibble probably a dma index */
164
   PUSH_DATA (push, 0x55001); /* constant */
165
   PUSH_DATA (push, dec->vp_params->offset >> 8);
166
   PUSH_DATA (push, (dec->vpring->offset + dec->vpring_residual) >> 8);
167
   PUSH_DATA (push, dec->vpring_ctrl);
168
   PUSH_DATA (push, dec->vpring->offset >> 8);
169
   PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
170
   PUSH_DATA (push, (dec->mbring->offset + dec->mbring->size - 0x2000) >> 8);
171
   PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
172
                     dec->vpring_residual + dec->vpring_deblock) >> 8);
173
   PUSH_DATA (push, 0);
174
   PUSH_DATA (push, 0x100008);
175
   PUSH_DATA (push, dest->interlaced->offset >> 8);
176
   PUSH_DATA (push, 0);
177

178
   BEGIN_NV04(push, SUBC_VP(0x620), 2);
179
   PUSH_DATA (push, 0);
180
   PUSH_DATA (push, 0);
181

182
   BEGIN_NV04(push, SUBC_VP(0x300), 1);
183
   PUSH_DATA (push, 0);
184

185
   /* VP step 2 */
186
   BEGIN_NV04(push, SUBC_VP(0x400), 5);
187
   PUSH_DATA (push, 0x54530201);
188
   PUSH_DATA (push, (dec->vp_params->offset >> 8) + 0x4);
189
   PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
190
                     dec->vpring_residual) >> 8);
191
   PUSH_DATA (push, dest->interlaced->offset >> 8);
192
   PUSH_DATA (push, dest->interlaced->offset >> 8);
193

194
   if (is_ref) {
195
      BEGIN_NV04(push, SUBC_VP(0x414), 1);
196
      PUSH_DATA (push, dest->full->offset >> 8);
197
   }
198

199
   BEGIN_NV04(push, SUBC_VP(0x620), 2);
200
   PUSH_DATAh(push, dec->vp_fw2_offset);
201
   PUSH_DATA (push, dec->vp_fw2_offset);
202

203
   BEGIN_NV04(push, SUBC_VP(0x300), 1);
204
   PUSH_DATA (push, 0);
205

206
   /* Set the semaphore back to 1 */
207
   BEGIN_NV04(push, SUBC_VP(0x610), 3);
208
   PUSH_DATAh(push, dec->fence->offset);
209
   PUSH_DATA (push, dec->fence->offset);
210
   PUSH_DATA (push, 1);
211

212
   /* Write to the semaphore location, intr */
213
   BEGIN_NV04(push, SUBC_VP(0x304), 1);
214
   PUSH_DATA (push, 0x101);
215

216
   for (i = 0; i < 2; i++) {
217
      struct nv50_miptree *mt = nv50_miptree(dest->resources[i]);
218
      mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
219
   }
220

221
   PUSH_KICK (push);
222
}
223

224
static inline int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
225
   int16_t ret = val * quant / 16;
226
   if (mpeg1 && ret) {
227
      if (ret > 0)
228
         ret = (ret - 1) | 1;
229
      else
230
         ret = (ret + 1) | 1;
231
   }
232
   if (ret < -2048)
233
      ret = -2048;
234
   else if (ret > 2047)
235
      ret = 2047;
236
   return ret;
237
}
238

239
struct mpeg12_mb_info {
240
   uint32_t index;
241
   uint8_t unk4;
242
   uint8_t unk5;
243
   uint16_t coded_block_pattern;
244
   uint8_t block_counts[6];
245
   uint16_t PMV[8];
246
   uint16_t skipped;
247
};
248

249
void
250
nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec,
251
                          struct pipe_mpeg12_picture_desc *desc,
252
                          const struct pipe_mpeg12_macroblock *macrob)
253
{
254
   STATIC_ASSERT(sizeof(struct mpeg12_mb_info) == 32);
255

256
   struct mpeg12_mb_info info = {0};
257
   int i, sum = 0, mask, block_index, count;
258
   const int16_t *blocks;
259
   int intra = macrob->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA;
260
   int motion = macrob->macroblock_type &
261
      (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD);
262
   const uint8_t *quant_matrix = intra ? dec->mpeg12_intra_matrix :
263
      dec->mpeg12_non_intra_matrix;
264
   int mpeg1 = dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1;
265

266
   info.index = macrob->y * mb(dec->base.width) + macrob->x;
267
   info.unk4 = motion;
268
   if (intra)
269
      info.unk4 |= 1;
270
   if (macrob->macroblock_modes.bits.dct_type)
271
      info.unk4 |= 0x20;
272
   info.unk5 = (macrob->motion_vertical_field_select << 4) |
273
      (macrob->macroblock_modes.value & 0xf);
274
   info.coded_block_pattern = macrob->coded_block_pattern;
275
   if (motion) {
276
      memcpy(info.PMV, macrob->PMV, sizeof(info.PMV));
277
   }
278
   blocks = macrob->blocks;
279
   for (mask = 0x20, block_index = 0; mask > 0; mask >>= 1, block_index++) {
280
      if ((macrob->coded_block_pattern & mask) == 0)
281
         continue;
282

283
      count = 0;
284

285
      /*
286
       * The observation here is that there are a lot of 0's, and things go
287
       * a lot faster if one skips over them.
288
       */
289

290
#if defined(PIPE_ARCH_SSE) && defined(PIPE_ARCH_X86_64)
291
/* Note that the SSE implementation is much more tuned to X86_64. As it's not
292
 * benchmarked on X86_32, disable it there. I suspect that the code needs to
293
 * be reorganized in terms of 32-bit wide data in order to be more
294
 * efficient. NV84+ were released well into the 64-bit CPU era, so it should
295
 * be a minority case.
296
 */
297

298
/* This returns a 16-bit bit-mask, each 2 bits are both 1 or both 0, depending
299
 * on whether the corresponding (16-bit) word in blocks is zero or non-zero. */
300
#define wordmask(blocks, zero) \
301
      (uint64_t)(_mm_movemask_epi8( \
302
                       _mm_cmpeq_epi16( \
303
                             zero, _mm_load_si128((__m128i *)(blocks)))))
304

305
      __m128i zero = _mm_setzero_si128();
306

307
      /* TODO: Look into doing the inverse quantization in terms of SSE
308
       * operations unconditionally, when necessary. */
309
      uint64_t bmask0 = wordmask(blocks, zero);
310
      bmask0 |= wordmask(blocks + 8, zero) << 16;
311
      bmask0 |= wordmask(blocks + 16, zero) << 32;
312
      bmask0 |= wordmask(blocks + 24, zero) << 48;
313
      uint64_t bmask1 = wordmask(blocks + 32, zero);
314
      bmask1 |= wordmask(blocks + 40, zero) << 16;
315
      bmask1 |= wordmask(blocks + 48, zero) << 32;
316
      bmask1 |= wordmask(blocks + 56, zero) << 48;
317

318
      /* The wordmask macro returns the inverse of what we want, since it
319
       * returns a 1 for equal-to-zero. Invert. */
320
      bmask0 = ~bmask0;
321
      bmask1 = ~bmask1;
322

323
      /* Note that the bitmask is actually sequences of 2 bits for each block
324
       * index. This is because there is no movemask_epi16. That means that
325
       * (a) ffs will never return 64, since the prev bit will always be set
326
       * in that case, and (b) we need to do an extra bit shift. Or'ing the
327
       * bitmasks together is faster than having a loop that computes them one
328
       * at a time and processes them, on a Core i7-920. Trying to put bmask
329
       * into an array and then looping also slows things down.
330
       */
331

332
      /* shift needs to be the same width as i, and unsigned so that / 2
333
       * becomes a rshift operation */
334
      uint32_t shift;
335
      i = 0;
336

337
      if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
338
         int16_t tmp;
339
         while ((shift = __builtin_ffsll(bmask0))) {
340
            i += (shift - 1) / 2;
341
            bmask0 >>= shift - 1;
342
            *dec->mpeg12_data++ = dec->zscan[i] * 2;
343
            tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1);
344
            *dec->mpeg12_data++ = tmp;
345
            sum += tmp;
346
            count++;
347
            i++;
348
            bmask0 >>= 2;
349
         }
350
         i = 32;
351
         while ((shift = __builtin_ffsll(bmask1))) {
352
            i += (shift - 1) / 2;
353
            bmask1 >>= shift - 1;
354
            *dec->mpeg12_data++ = dec->zscan[i] * 2;
355
            tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1);
356
            *dec->mpeg12_data++ = tmp;
357
            sum += tmp;
358
            count++;
359
            i++;
360
            bmask1 >>= 2;
361
         }
362
      } else {
363
         while ((shift = __builtin_ffsll(bmask0))) {
364
            i += (shift - 1) / 2;
365
            bmask0 >>= shift - 1;
366
            *dec->mpeg12_data++ = i * 2;
367
            *dec->mpeg12_data++ = blocks[i];
368
            count++;
369
            i++;
370
            bmask0 >>= 2;
371
         }
372
         i = 32;
373
         while ((shift = __builtin_ffsll(bmask1))) {
374
            i += (shift - 1) / 2;
375
            bmask1 >>= shift - 1;
376
            *dec->mpeg12_data++ = i * 2;
377
            *dec->mpeg12_data++ = blocks[i];
378
            count++;
379
            i++;
380
            bmask1 >>= 2;
381
         }
382
      }
383
#undef wordmask
384
#else
385

386
      /*
387
       * This loop looks ridiculously written... and it is. I tried a lot of
388
       * different ways of achieving this scan, and this was the fastest, at
389
       * least on a Core i7-920. Note that it's not necessary to skip the 0's,
390
       * the firmware will deal with those just fine. But it's faster to skip
391
       * them. Note to people trying benchmarks: make sure to use realistic
392
       * mpeg data, which can often be a single data point first followed by
393
       * 63 0's, or <data> 7x <0> <data> 7x <0> etc.
394
       */
395
      i = 0;
396
      if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
397
         while (true) {
398
            int16_t tmp;
399
            while (likely(i < 64 && !(tmp = blocks[i]))) i++;
400
            if (i >= 64) break;
401
            *dec->mpeg12_data++ = dec->zscan[i] * 2;
402
            tmp = inverse_quantize(tmp, quant_matrix[i], mpeg1);
403
            *dec->mpeg12_data++ = tmp;
404
            sum += tmp;
405
            count++;
406
            i++;
407
         }
408
      } else {
409
         while (true) {
410
            int16_t tmp;
411
            while (likely(i < 64 && !(tmp = blocks[i]))) i++;
412
            if (i >= 64) break;
413
            *dec->mpeg12_data++ = i * 2;
414
            *dec->mpeg12_data++ = tmp;
415
            count++;
416
            i++;
417
         }
418
      }
419

420
#endif
421

422
      if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
423
         if (!mpeg1 && (sum & 1) == 0) {
424
            if (count && *(dec->mpeg12_data - 2) == 63 * 2) {
425
               uint16_t *val = dec->mpeg12_data - 1;
426
               if (*val & 1) *val -= 1;
427
               else *val += 1;
428
            } else {
429
               *dec->mpeg12_data++ = 63 * 2;
430
               *dec->mpeg12_data++ = 1;
431
               count++;
432
            }
433
         }
434
      }
435

436
      if (count) {
437
         *(dec->mpeg12_data - 2) |= 1;
438
      } else {
439
         *dec->mpeg12_data++ = 1;
440
         *dec->mpeg12_data++ = 0;
441
         count = 1;
442
      }
443
      info.block_counts[block_index] = count;
444
      blocks += 64;
445
   }
446

447
   memcpy(dec->mpeg12_mb_info, &info, sizeof(info));
448
   dec->mpeg12_mb_info += sizeof(info);
449

450
   if (macrob->num_skipped_macroblocks) {
451
      info.index++;
452
      info.coded_block_pattern = 0;
453
      info.skipped = macrob->num_skipped_macroblocks - 1;
454
      memset(info.block_counts, 0, sizeof(info.block_counts));
455
      memcpy(dec->mpeg12_mb_info, &info, sizeof(info));
456
      dec->mpeg12_mb_info += sizeof(info);
457
   }
458
}
459

460
struct mpeg12_header {
461
   uint32_t luma_top_size; // 00
462
   uint32_t luma_bottom_size; // 04
463
   uint32_t chroma_top_size; // 08
464
   uint32_t mbs; // 0c
465
   uint32_t mb_info_size; // 10
466
   uint32_t mb_width_minus1; // 14
467
   uint32_t mb_height_minus1; // 18
468
   uint32_t width; // 1c
469
   uint32_t height; // 20
470
   uint8_t progressive; // 24
471
   uint8_t mocomp_only; // 25
472
   uint8_t frames; // 26
473
   uint8_t picture_structure; // 27
474
   uint32_t unk28; // 28 -- 0x50100
475
   uint32_t unk2c; // 2c
476
   uint32_t pad[4 * 13];
477
};
478

479
void
480
nv84_decoder_vp_mpeg12(struct nv84_decoder *dec,
481
                       struct pipe_mpeg12_picture_desc *desc,
482
                       struct nv84_video_buffer *dest)
483
{
484
   struct nouveau_pushbuf *push = dec->vp_pushbuf;
485
   struct nv84_video_buffer *ref1 = (struct nv84_video_buffer *)desc->ref[0];
486
   struct nv84_video_buffer *ref2 = (struct nv84_video_buffer *)desc->ref[1];
487
   struct nouveau_pushbuf_refn bo_refs[] = {
488
      { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
489
      { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
490
      { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
491
      { dec->mpeg12_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
492
   };
493
   int i, num_refs = ARRAY_SIZE(bo_refs);
494
   struct mpeg12_header header = {0};
495
   struct nv50_miptree *y = nv50_miptree(dest->resources[0]);
496
   struct nv50_miptree *uv = nv50_miptree(dest->resources[1]);
497

498
   STATIC_ASSERT(sizeof(struct mpeg12_header) == 0x100);
499

500
   if (!ref1)
501
      ref1 = dest;
502
   if (!ref2)
503
      ref2 = dest;
504
   bo_refs[1].bo = ref1->interlaced;
505
   bo_refs[2].bo = ref2->interlaced;
506

507
   header.luma_top_size = y->layer_stride;
508
   header.luma_bottom_size = y->layer_stride;
509
   header.chroma_top_size = uv->layer_stride;
510
   header.mbs = mb(dec->base.width) * mb(dec->base.height);
511
   header.mb_info_size = dec->mpeg12_mb_info - dec->mpeg12_bo->map - 0x100;
512
   header.mb_width_minus1 = mb(dec->base.width) - 1;
513
   header.mb_height_minus1 = mb(dec->base.height) - 1;
514
   header.width = align(dec->base.width, 16);
515
   header.height = align(dec->base.height, 16);
516
   header.progressive = desc->frame_pred_frame_dct;
517
   header.frames = 1 + (desc->ref[0] != NULL) + (desc->ref[1] != NULL);
518
   header.picture_structure = desc->picture_structure;
519
   header.unk28 = 0x50100;
520

521
   memcpy(dec->mpeg12_bo->map, &header, sizeof(header));
522

523
   PUSH_SPACE(push, 10 + 3 + 2);
524

525
   nouveau_pushbuf_refn(push, bo_refs, num_refs);
526

527
   BEGIN_NV04(push, SUBC_VP(0x400), 9);
528
   PUSH_DATA (push, 0x543210); /* each nibble possibly a dma index */
529
   PUSH_DATA (push, 0x555001); /* constant */
530
   PUSH_DATA (push, dec->mpeg12_bo->offset >> 8);
531
   PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100) >> 8);
532
   PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100 +
533
                     align(0x20 * mb(dec->base.width) *
534
                           mb(dec->base.height), 0x100)) >> 8);
535
   PUSH_DATA (push, dest->interlaced->offset >> 8);
536
   PUSH_DATA (push, ref1->interlaced->offset >> 8);
537
   PUSH_DATA (push, ref2->interlaced->offset >> 8);
538
   PUSH_DATA (push, 6 * 64 * 8 * header.mbs);
539

540
   BEGIN_NV04(push, SUBC_VP(0x620), 2);
541
   PUSH_DATA (push, 0);
542
   PUSH_DATA (push, 0);
543

544
   BEGIN_NV04(push, SUBC_VP(0x300), 1);
545
   PUSH_DATA (push, 0);
546

547
   for (i = 0; i < 2; i++) {
548
      struct nv50_miptree *mt = nv50_miptree(dest->resources[i]);
549
      mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
550
   }
551
   PUSH_KICK (push);
552
}
553

554
Product

Resources

Company