Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/vl/vl_idct.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2010 Christian König
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
#include <assert.h>
29
30
#include "pipe/p_context.h"
31
#include "pipe/p_screen.h"
32
33
#include "util/u_draw.h"
34
#include "util/u_sampler.h"
35
#include "util/u_memory.h"
36
37
#include "tgsi/tgsi_ureg.h"
38
39
#include "vl_defines.h"
40
#include "vl_types.h"
41
#include "vl_vertex_buffers.h"
42
#include "vl_idct.h"
43
44
enum VS_OUTPUT
45
{
46
VS_O_VPOS = 0,
47
VS_O_L_ADDR0 = 0,
48
VS_O_L_ADDR1,
49
VS_O_R_ADDR0,
50
VS_O_R_ADDR1
51
};
52
53
/**
54
* The DCT matrix stored as hex representation of floats. Equal to the following equation:
55
* for (i = 0; i < 8; ++i)
56
* for (j = 0; j < 8; ++j)
57
* if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
58
* else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
59
*/
60
static const uint32_t const_matrix[8][8] = {
61
{ 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
62
{ 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
63
{ 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
64
{ 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
65
{ 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
66
{ 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
67
{ 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
68
{ 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
69
};
70
71
static void
72
calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
73
struct ureg_src tc, struct ureg_src start, bool right_side,
74
bool transposed, float size)
75
{
76
unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
77
unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
78
79
unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
80
unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
81
82
/*
83
* addr[0..1].(start) = right_side ? start.x : tc.x
84
* addr[0..1].(tc) = right_side ? tc.y : start.y
85
* addr[0..1].z = tc.z
86
* addr[1].(start) += 1.0f / scale
87
*/
88
ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
89
ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
90
91
ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
92
ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
93
}
94
95
static void
96
increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
97
struct ureg_src saddr[2], bool right_side, bool transposed,
98
int pos, float size)
99
{
100
unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
101
unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
102
103
/*
104
* daddr[0..1].(start) = saddr[0..1].(start)
105
* daddr[0..1].(tc) = saddr[0..1].(tc)
106
*/
107
108
ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
109
ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
110
ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
111
ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
112
}
113
114
static void
115
fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
116
struct ureg_src sampler, bool resource3d)
117
{
118
ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
119
ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
120
}
121
122
static void
123
matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
124
{
125
struct ureg_dst tmp;
126
127
tmp = ureg_DECL_temporary(shader);
128
129
/*
130
* tmp.xy = dot4(m[0][0..1], m[1][0..1])
131
* dst = tmp.x + tmp.y
132
*/
133
ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
134
ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
135
ureg_ADD(shader, dst,
136
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
137
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
138
139
ureg_release_temporary(shader, tmp);
140
}
141
142
static void *
143
create_mismatch_vert_shader(struct vl_idct *idct)
144
{
145
struct ureg_program *shader;
146
struct ureg_src vpos;
147
struct ureg_src scale;
148
struct ureg_dst t_tex;
149
struct ureg_dst o_vpos, o_addr[2];
150
151
shader = ureg_create(PIPE_SHADER_VERTEX);
152
if (!shader)
153
return NULL;
154
155
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
156
157
t_tex = ureg_DECL_temporary(shader);
158
159
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
160
161
o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
162
o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
163
164
/*
165
* scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
166
*
167
* t_vpos = vpos + 7 / VL_BLOCK_WIDTH
168
* o_vpos.xy = t_vpos * scale
169
*
170
* o_addr = calc_addr(...)
171
*
172
*/
173
174
scale = ureg_imm2f(shader,
175
(float)VL_BLOCK_WIDTH / idct->buffer_width,
176
(float)VL_BLOCK_HEIGHT / idct->buffer_height);
177
178
ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
179
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
180
181
ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
182
calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
183
184
ureg_release_temporary(shader, t_tex);
185
186
ureg_END(shader);
187
188
return ureg_create_shader_and_destroy(shader, idct->pipe);
189
}
190
191
static void *
192
create_mismatch_frag_shader(struct vl_idct *idct)
193
{
194
struct ureg_program *shader;
195
196
struct ureg_src addr[2];
197
198
struct ureg_dst m[8][2];
199
struct ureg_dst fragment;
200
201
unsigned i;
202
203
shader = ureg_create(PIPE_SHADER_FRAGMENT);
204
if (!shader)
205
return NULL;
206
207
addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
208
addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
209
210
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
211
212
for (i = 0; i < 8; ++i) {
213
m[i][0] = ureg_DECL_temporary(shader);
214
m[i][1] = ureg_DECL_temporary(shader);
215
}
216
217
for (i = 0; i < 8; ++i) {
218
increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
219
}
220
221
for (i = 0; i < 8; ++i) {
222
struct ureg_src s_addr[2];
223
s_addr[0] = ureg_src(m[i][0]);
224
s_addr[1] = ureg_src(m[i][1]);
225
fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
226
}
227
228
for (i = 1; i < 8; ++i) {
229
ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
230
ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
231
}
232
233
ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
234
ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
235
236
ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
237
ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
238
ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
239
240
ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
241
ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
242
ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
243
ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
244
245
ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
246
ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
247
248
for (i = 0; i < 8; ++i) {
249
ureg_release_temporary(shader, m[i][0]);
250
ureg_release_temporary(shader, m[i][1]);
251
}
252
253
ureg_END(shader);
254
255
return ureg_create_shader_and_destroy(shader, idct->pipe);
256
}
257
258
static void *
259
create_stage1_vert_shader(struct vl_idct *idct)
260
{
261
struct ureg_program *shader;
262
struct ureg_src vrect, vpos;
263
struct ureg_src scale;
264
struct ureg_dst t_tex, t_start;
265
struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
266
267
shader = ureg_create(PIPE_SHADER_VERTEX);
268
if (!shader)
269
return NULL;
270
271
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
272
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
273
274
t_tex = ureg_DECL_temporary(shader);
275
t_start = ureg_DECL_temporary(shader);
276
277
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
278
279
o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
280
o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
281
282
o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
283
o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
284
285
/*
286
* scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
287
*
288
* t_vpos = vpos + vrect
289
* o_vpos.xy = t_vpos * scale
290
* o_vpos.zw = vpos
291
*
292
* o_l_addr = calc_addr(...)
293
* o_r_addr = calc_addr(...)
294
*
295
*/
296
297
scale = ureg_imm2f(shader,
298
(float)VL_BLOCK_WIDTH / idct->buffer_width,
299
(float)VL_BLOCK_HEIGHT / idct->buffer_height);
300
301
ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
302
ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
303
304
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
305
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
306
307
ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
308
309
calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
310
calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);
311
312
ureg_release_temporary(shader, t_tex);
313
ureg_release_temporary(shader, t_start);
314
315
ureg_END(shader);
316
317
return ureg_create_shader_and_destroy(shader, idct->pipe);
318
}
319
320
static void *
321
create_stage1_frag_shader(struct vl_idct *idct)
322
{
323
struct ureg_program *shader;
324
struct ureg_src l_addr[2], r_addr[2];
325
struct ureg_dst l[4][2], r[2];
326
struct ureg_dst *fragment;
327
unsigned i;
328
int j;
329
330
shader = ureg_create(PIPE_SHADER_FRAGMENT);
331
if (!shader)
332
return NULL;
333
334
fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));
335
336
l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
337
l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
338
339
r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
340
r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
341
342
for (i = 0; i < idct->nr_of_render_targets; ++i)
343
fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
344
345
for (i = 0; i < 4; ++i) {
346
l[i][0] = ureg_DECL_temporary(shader);
347
l[i][1] = ureg_DECL_temporary(shader);
348
}
349
350
r[0] = ureg_DECL_temporary(shader);
351
r[1] = ureg_DECL_temporary(shader);
352
353
for (i = 0; i < 4; ++i) {
354
increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
355
}
356
357
for (i = 0; i < 4; ++i) {
358
struct ureg_src s_addr[2];
359
s_addr[0] = ureg_src(l[i][0]);
360
s_addr[1] = ureg_src(l[i][1]);
361
fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
362
}
363
364
for (i = 0; i < idct->nr_of_render_targets; ++i) {
365
struct ureg_src s_addr[2];
366
367
increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);
368
369
s_addr[0] = ureg_src(r[0]);
370
s_addr[1] = ureg_src(r[1]);
371
fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
372
373
for (j = 0; j < 4; ++j) {
374
matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
375
}
376
}
377
378
for (i = 0; i < 4; ++i) {
379
ureg_release_temporary(shader, l[i][0]);
380
ureg_release_temporary(shader, l[i][1]);
381
}
382
ureg_release_temporary(shader, r[0]);
383
ureg_release_temporary(shader, r[1]);
384
385
ureg_END(shader);
386
387
FREE(fragment);
388
389
return ureg_create_shader_and_destroy(shader, idct->pipe);
390
}
391
392
void
393
vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
394
unsigned first_output, struct ureg_dst tex)
395
{
396
struct ureg_src vrect, vpos;
397
struct ureg_src scale;
398
struct ureg_dst t_start;
399
struct ureg_dst o_l_addr[2], o_r_addr[2];
400
401
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
402
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
403
404
t_start = ureg_DECL_temporary(shader);
405
406
--first_output;
407
408
o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
409
o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
410
411
o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
412
o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
413
414
scale = ureg_imm2f(shader,
415
(float)VL_BLOCK_WIDTH / idct->buffer_width,
416
(float)VL_BLOCK_HEIGHT / idct->buffer_height);
417
418
ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
419
ureg_scalar(vrect, TGSI_SWIZZLE_X),
420
ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
421
ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
422
423
calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
424
calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
425
426
ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
427
ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
428
}
429
430
void
431
vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
432
unsigned first_input, struct ureg_dst fragment)
433
{
434
struct ureg_src l_addr[2], r_addr[2];
435
436
struct ureg_dst l[2], r[2];
437
438
--first_input;
439
440
l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
441
l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
442
443
r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
444
r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
445
446
l[0] = ureg_DECL_temporary(shader);
447
l[1] = ureg_DECL_temporary(shader);
448
r[0] = ureg_DECL_temporary(shader);
449
r[1] = ureg_DECL_temporary(shader);
450
451
fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
452
fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
453
454
matrix_mul(shader, fragment, l, r);
455
456
ureg_release_temporary(shader, l[0]);
457
ureg_release_temporary(shader, l[1]);
458
ureg_release_temporary(shader, r[0]);
459
ureg_release_temporary(shader, r[1]);
460
}
461
462
static bool
463
init_shaders(struct vl_idct *idct)
464
{
465
idct->vs_mismatch = create_mismatch_vert_shader(idct);
466
if (!idct->vs_mismatch)
467
goto error_vs_mismatch;
468
469
idct->fs_mismatch = create_mismatch_frag_shader(idct);
470
if (!idct->fs_mismatch)
471
goto error_fs_mismatch;
472
473
idct->vs = create_stage1_vert_shader(idct);
474
if (!idct->vs)
475
goto error_vs;
476
477
idct->fs = create_stage1_frag_shader(idct);
478
if (!idct->fs)
479
goto error_fs;
480
481
return true;
482
483
error_fs:
484
idct->pipe->delete_vs_state(idct->pipe, idct->vs);
485
486
error_vs:
487
idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
488
489
error_fs_mismatch:
490
idct->pipe->delete_vs_state(idct->pipe, idct->fs);
491
492
error_vs_mismatch:
493
return false;
494
}
495
496
static void
497
cleanup_shaders(struct vl_idct *idct)
498
{
499
idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
500
idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
501
idct->pipe->delete_vs_state(idct->pipe, idct->vs);
502
idct->pipe->delete_fs_state(idct->pipe, idct->fs);
503
}
504
505
static bool
506
init_state(struct vl_idct *idct)
507
{
508
struct pipe_blend_state blend;
509
struct pipe_rasterizer_state rs_state;
510
struct pipe_sampler_state sampler;
511
unsigned i;
512
513
assert(idct);
514
515
memset(&rs_state, 0, sizeof(rs_state));
516
rs_state.point_size = 1;
517
rs_state.half_pixel_center = true;
518
rs_state.bottom_edge_rule = true;
519
rs_state.depth_clip_near = 1;
520
rs_state.depth_clip_far = 1;
521
522
idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
523
if (!idct->rs_state)
524
goto error_rs_state;
525
526
memset(&blend, 0, sizeof blend);
527
528
blend.independent_blend_enable = 0;
529
blend.rt[0].blend_enable = 0;
530
blend.rt[0].rgb_func = PIPE_BLEND_ADD;
531
blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
532
blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
533
blend.rt[0].alpha_func = PIPE_BLEND_ADD;
534
blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
535
blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
536
blend.logicop_enable = 0;
537
blend.logicop_func = PIPE_LOGICOP_CLEAR;
538
/* Needed to allow color writes to FB, even if blending disabled */
539
blend.rt[0].colormask = PIPE_MASK_RGBA;
540
blend.dither = 0;
541
idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
542
if (!idct->blend)
543
goto error_blend;
544
545
for (i = 0; i < 2; ++i) {
546
memset(&sampler, 0, sizeof(sampler));
547
sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
548
sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
549
sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
550
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
551
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
552
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
553
sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
554
sampler.compare_func = PIPE_FUNC_ALWAYS;
555
sampler.normalized_coords = 1;
556
idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
557
if (!idct->samplers[i])
558
goto error_samplers;
559
}
560
561
return true;
562
563
error_samplers:
564
for (i = 0; i < 2; ++i)
565
if (idct->samplers[i])
566
idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
567
568
idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
569
570
error_blend:
571
idct->pipe->delete_blend_state(idct->pipe, idct->blend);
572
573
error_rs_state:
574
return false;
575
}
576
577
static void
578
cleanup_state(struct vl_idct *idct)
579
{
580
unsigned i;
581
582
for (i = 0; i < 2; ++i)
583
idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
584
585
idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
586
idct->pipe->delete_blend_state(idct->pipe, idct->blend);
587
}
588
589
static bool
590
init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
591
{
592
struct pipe_resource *tex;
593
struct pipe_surface surf_templ;
594
595
assert(idct && buffer);
596
597
tex = buffer->sampler_views.individual.source->texture;
598
599
buffer->fb_state_mismatch.width = tex->width0;
600
buffer->fb_state_mismatch.height = tex->height0;
601
buffer->fb_state_mismatch.nr_cbufs = 1;
602
603
memset(&surf_templ, 0, sizeof(surf_templ));
604
surf_templ.format = tex->format;
605
surf_templ.u.tex.first_layer = 0;
606
surf_templ.u.tex.last_layer = 0;
607
buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
608
609
buffer->viewport_mismatch.scale[0] = tex->width0;
610
buffer->viewport_mismatch.scale[1] = tex->height0;
611
buffer->viewport_mismatch.scale[2] = 1;
612
buffer->viewport_mismatch.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
613
buffer->viewport_mismatch.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
614
buffer->viewport_mismatch.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
615
buffer->viewport_mismatch.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
616
617
return true;
618
}
619
620
static void
621
cleanup_source(struct vl_idct_buffer *buffer)
622
{
623
assert(buffer);
624
625
pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
626
627
pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
628
}
629
630
static bool
631
init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
632
{
633
struct pipe_resource *tex;
634
struct pipe_surface surf_templ;
635
unsigned i;
636
637
assert(idct && buffer);
638
639
tex = buffer->sampler_views.individual.intermediate->texture;
640
641
buffer->fb_state.width = tex->width0;
642
buffer->fb_state.height = tex->height0;
643
buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
644
for(i = 0; i < idct->nr_of_render_targets; ++i) {
645
memset(&surf_templ, 0, sizeof(surf_templ));
646
surf_templ.format = tex->format;
647
surf_templ.u.tex.first_layer = i;
648
surf_templ.u.tex.last_layer = i;
649
buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
650
idct->pipe, tex, &surf_templ);
651
652
if (!buffer->fb_state.cbufs[i])
653
goto error_surfaces;
654
}
655
656
buffer->viewport.scale[0] = tex->width0;
657
buffer->viewport.scale[1] = tex->height0;
658
buffer->viewport.scale[2] = 1;
659
buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
660
buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
661
buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
662
buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
663
664
return true;
665
666
error_surfaces:
667
for(i = 0; i < idct->nr_of_render_targets; ++i)
668
pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
669
670
return false;
671
}
672
673
static void
674
cleanup_intermediate(struct vl_idct_buffer *buffer)
675
{
676
unsigned i;
677
678
assert(buffer);
679
680
for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
681
pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
682
683
pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
684
}
685
686
struct pipe_sampler_view *
687
vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
688
{
689
struct pipe_resource tex_templ, *matrix;
690
struct pipe_sampler_view sv_templ, *sv;
691
struct pipe_transfer *buf_transfer;
692
unsigned i, j, pitch;
693
float *f;
694
695
struct pipe_box rect =
696
{
697
0, 0, 0,
698
VL_BLOCK_WIDTH / 4,
699
VL_BLOCK_HEIGHT,
700
1
701
};
702
703
assert(pipe);
704
705
memset(&tex_templ, 0, sizeof(tex_templ));
706
tex_templ.target = PIPE_TEXTURE_2D;
707
tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
708
tex_templ.last_level = 0;
709
tex_templ.width0 = 2;
710
tex_templ.height0 = 8;
711
tex_templ.depth0 = 1;
712
tex_templ.array_size = 1;
713
tex_templ.usage = PIPE_USAGE_IMMUTABLE;
714
tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
715
tex_templ.flags = 0;
716
717
matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
718
if (!matrix)
719
goto error_matrix;
720
721
f = pipe->texture_map(pipe, matrix, 0,
722
PIPE_MAP_WRITE |
723
PIPE_MAP_DISCARD_RANGE,
724
&rect, &buf_transfer);
725
if (!f)
726
goto error_map;
727
728
pitch = buf_transfer->stride / sizeof(float);
729
730
for(i = 0; i < VL_BLOCK_HEIGHT; ++i)
731
for(j = 0; j < VL_BLOCK_WIDTH; ++j)
732
// transpose and scale
733
f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
734
735
pipe->texture_unmap(pipe, buf_transfer);
736
737
memset(&sv_templ, 0, sizeof(sv_templ));
738
u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
739
sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
740
pipe_resource_reference(&matrix, NULL);
741
if (!sv)
742
goto error_map;
743
744
return sv;
745
746
error_map:
747
pipe_resource_reference(&matrix, NULL);
748
749
error_matrix:
750
return NULL;
751
}
752
753
bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
754
unsigned buffer_width, unsigned buffer_height,
755
unsigned nr_of_render_targets,
756
struct pipe_sampler_view *matrix,
757
struct pipe_sampler_view *transpose)
758
{
759
assert(idct && pipe);
760
assert(matrix && transpose);
761
762
idct->pipe = pipe;
763
idct->buffer_width = buffer_width;
764
idct->buffer_height = buffer_height;
765
idct->nr_of_render_targets = nr_of_render_targets;
766
767
pipe_sampler_view_reference(&idct->matrix, matrix);
768
pipe_sampler_view_reference(&idct->transpose, transpose);
769
770
if(!init_shaders(idct))
771
return false;
772
773
if(!init_state(idct)) {
774
cleanup_shaders(idct);
775
return false;
776
}
777
778
return true;
779
}
780
781
void
782
vl_idct_cleanup(struct vl_idct *idct)
783
{
784
cleanup_shaders(idct);
785
cleanup_state(idct);
786
787
pipe_sampler_view_reference(&idct->matrix, NULL);
788
pipe_sampler_view_reference(&idct->transpose, NULL);
789
}
790
791
bool
792
vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
793
struct pipe_sampler_view *source,
794
struct pipe_sampler_view *intermediate)
795
{
796
assert(buffer && idct);
797
assert(source && intermediate);
798
799
memset(buffer, 0, sizeof(struct vl_idct_buffer));
800
801
pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
802
pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
803
pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
804
pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
805
806
if (!init_source(idct, buffer))
807
return false;
808
809
if (!init_intermediate(idct, buffer))
810
return false;
811
812
return true;
813
}
814
815
void
816
vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
817
{
818
assert(buffer);
819
820
cleanup_source(buffer);
821
cleanup_intermediate(buffer);
822
823
pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
824
pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
825
}
826
827
void
828
vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
829
{
830
assert(buffer);
831
832
idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
833
idct->pipe->bind_blend_state(idct->pipe, idct->blend);
834
835
idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
836
0, 2, idct->samplers);
837
838
idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0,
839
buffer->sampler_views.stage[0]);
840
841
/* mismatch control */
842
idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
843
idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);
844
idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
845
idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
846
util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
847
848
/* first stage */
849
idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
850
idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);
851
idct->pipe->bind_vs_state(idct->pipe, idct->vs);
852
idct->pipe->bind_fs_state(idct->pipe, idct->fs);
853
util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
854
}
855
856
void
857
vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
858
{
859
assert(buffer);
860
861
/* second stage */
862
idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
863
idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
864
0, 2, idct->samplers);
865
idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT,
866
0, 2, 0, buffer->sampler_views.stage[1]);
867
}
868
869
870