CoCalc -- sp_tex

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/softpipe/sp_tex_sample.c
⁴⁵⁷⁰ views
1
/**************************************************************************
2
 * 
3
 * Copyright 2007 VMware, Inc.
4
 * All Rights Reserved.
5
 * Copyright 2008-2010 VMware, Inc.  All rights reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sub license, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 * 
15
 * The above copyright notice and this permission notice (including the
16
 * next paragraph) shall be included in all copies or substantial portions
17
 * of the Software.
18
 * 
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 * 
27
 **************************************************************************/
28

29
/**
30
 * Texture sampling
31
 *
32
 * Authors:
33
 *   Brian Paul
34
 *   Keith Whitwell
35
 */
36

37
#include "pipe/p_context.h"
38
#include "pipe/p_defines.h"
39
#include "pipe/p_shader_tokens.h"
40
#include "util/u_math.h"
41
#include "util/format/u_format.h"
42
#include "util/u_memory.h"
43
#include "util/u_inlines.h"
44
#include "sp_quad.h"   /* only for #define QUAD_* tokens */
45
#include "sp_tex_sample.h"
46
#include "sp_texture.h"
47
#include "sp_tex_tile_cache.h"
48

49

50
/** Set to one to help debug texture sampling */
51
#define DEBUG_TEX 0
52

53

54
/*
55
 * Return fractional part of 'f'.  Used for computing interpolation weights.
56
 * Need to be careful with negative values.
57
 * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
58
 * of improperly weighted linear-filtered textures.
59
 * The tests/texwrap.c demo is a good test.
60
 */
61
static inline float
62
frac(float f)
63
{
64
   return f - floorf(f);
65
}
66

67

68

69
/**
70
 * Linear interpolation macro
71
 */
72
static inline float
73
lerp(float a, float v0, float v1)
74
{
75
   return v0 + a * (v1 - v0);
76
}
77

78

79
/**
80
 * Do 2D/bilinear interpolation of float values.
81
 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
82
 * a and b are the horizontal and vertical interpolants.
83
 * It's important that this function is inlined when compiled with
84
 * optimization!  If we find that's not true on some systems, convert
85
 * to a macro.
86
 */
87
static inline float
88
lerp_2d(float a, float b,
89
        float v00, float v10, float v01, float v11)
90
{
91
   const float temp0 = lerp(a, v00, v10);
92
   const float temp1 = lerp(a, v01, v11);
93
   return lerp(b, temp0, temp1);
94
}
95

96

97
/**
98
 * As above, but 3D interpolation of 8 values.
99
 */
100
static inline float
101
lerp_3d(float a, float b, float c,
102
        float v000, float v100, float v010, float v110,
103
        float v001, float v101, float v011, float v111)
104
{
105
   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
106
   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
107
   return lerp(c, temp0, temp1);
108
}
109

110

111

112
/**
113
 * Compute coord % size for repeat wrap modes.
114
 * Note that if coord is negative, coord % size doesn't give the right
115
 * value.  To avoid that problem we add a large multiple of the size
116
 * (rather than using a conditional).
117
 */
118
static inline int
119
repeat(int coord, unsigned size)
120
{
121
   return (coord + size * 1024) % size;
122
}
123

124

125
/**
126
 * Apply texture coord wrapping mode and return integer texture indexes
127
 * for a vector of four texcoords (S or T or P).
128
 * \param wrapMode  PIPE_TEX_WRAP_x
129
 * \param s  the incoming texcoords
130
 * \param size  the texture image size
131
 * \param icoord  returns the integer texcoords
132
 */
133
static void
134
wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
135
{
136
   /* s limited to [0,1) */
137
   /* i limited to [0,size-1] */
138
   const int i = util_ifloor(s * size);
139
   *icoord = repeat(i + offset, size);
140
}
141

142

143
static void
144
wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord)
145
{
146
   /* s limited to [0,1] */
147
   /* i limited to [0,size-1] */
148
   s *= size;
149
   s += offset;
150
   if (s <= 0.0F)
151
      *icoord = 0;
152
   else if (s >= size)
153
      *icoord = size - 1;
154
   else
155
      *icoord = util_ifloor(s);
156
}
157

158

159
static void
160
wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
161
{
162
   /* s limited to [min,max] */
163
   /* i limited to [0, size-1] */
164
   const float min = 0.5F;
165
   const float max = (float)size - 0.5F;
166

167
   s *= size;
168
   s += offset;
169

170
   if (s < min)
171
      *icoord = 0;
172
   else if (s > max)
173
      *icoord = size - 1;
174
   else
175
      *icoord = util_ifloor(s);
176
}
177

178

179
static void
180
wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord)
181
{
182
   /* s limited to [min,max] */
183
   /* i limited to [-1, size] */
184
   const float min = -0.5F;
185
   const float max = size + 0.5F;
186

187
   s *= size;
188
   s += offset;
189
   if (s <= min)
190
      *icoord = -1;
191
   else if (s >= max)
192
      *icoord = size;
193
   else
194
      *icoord = util_ifloor(s);
195
}
196

197
static void
198
wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
199
{
200
   const float min = 1.0F / (2.0F * size);
201
   const float max = 1.0F - min;
202
   int flr;
203
   float u;
204

205
   s += (float)offset / size;
206
   flr = util_ifloor(s);
207
   u = frac(s);
208
   if (flr & 1)
209
      u = 1.0F - u;
210
   if (u < min)
211
      *icoord = 0;
212
   else if (u > max)
213
      *icoord = size - 1;
214
   else
215
      *icoord = util_ifloor(u * size);
216
}
217

218

219
static void
220
wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord)
221
{
222
   /* s limited to [0,1] */
223
   /* i limited to [0,size-1] */
224
   const float u = fabsf(s * size + offset);
225
   if (u <= 0.0F)
226
      *icoord = 0;
227
   else if (u >= size)
228
      *icoord = size - 1;
229
   else
230
      *icoord = util_ifloor(u);
231
}
232

233

234
static void
235
wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
236
{
237
   /* s limited to [min,max] */
238
   /* i limited to [0, size-1] */
239
   const float min = 0.5F;
240
   const float max = (float)size - 0.5F;
241
   const float u = fabsf(s * size + offset);
242

243
   if (u < min)
244
      *icoord = 0;
245
   else if (u > max)
246
      *icoord = size - 1;
247
   else
248
      *icoord = util_ifloor(u);
249
}
250

251

252
static void
253
wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord)
254
{
255
   /* u limited to [-0.5, size-0.5] */
256
   const float min = -0.5F;
257
   const float max = (float)size + 0.5F;
258
   const float u = fabsf(s * size + offset);
259

260
   if (u < min)
261
      *icoord = -1;
262
   else if (u > max)
263
      *icoord = size;
264
   else
265
      *icoord = util_ifloor(u);
266
}
267

268

269
/**
270
 * Used to compute texel locations for linear sampling
271
 * \param wrapMode  PIPE_TEX_WRAP_x
272
 * \param s  the texcoord
273
 * \param size  the texture image size
274
 * \param icoord0  returns first texture index
275
 * \param icoord1  returns second texture index (usually icoord0 + 1)
276
 * \param w  returns blend factor/weight between texture indices
277
 * \param icoord  returns the computed integer texture coord
278
 */
279
static void
280
wrap_linear_repeat(float s, unsigned size, int offset,
281
                   int *icoord0, int *icoord1, float *w)
282
{
283
   const float u = s * size - 0.5F;
284
   *icoord0 = repeat(util_ifloor(u) + offset, size);
285
   *icoord1 = repeat(*icoord0 + 1, size);
286
   *w = frac(u);
287
}
288

289

290
static void
291
wrap_linear_clamp(float s, unsigned size, int offset,
292
                  int *icoord0, int *icoord1, float *w)
293
{
294
   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
295

296
   *icoord0 = util_ifloor(u);
297
   *icoord1 = *icoord0 + 1;
298
   *w = frac(u);
299
}
300

301

302
static void
303
wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
304
                          int *icoord0, int *icoord1, float *w)
305
{
306
   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
307
   *icoord0 = util_ifloor(u);
308
   *icoord1 = *icoord0 + 1;
309
   if (*icoord0 < 0)
310
      *icoord0 = 0;
311
   if (*icoord1 >= (int) size)
312
      *icoord1 = size - 1;
313
   *w = frac(u);
314
}
315

316

317
static void
318
wrap_linear_clamp_to_border(float s, unsigned size, int offset,
319
                            int *icoord0, int *icoord1, float *w)
320
{
321
   const float min = -1.0F;
322
   const float max = (float)size + 0.5F;
323
   const float u = CLAMP(s * size + offset, min, max) - 0.5f;
324
   *icoord0 = util_ifloor(u);
325
   *icoord1 = *icoord0 + 1;
326
   *w = frac(u);
327
}
328

329

330
static void
331
wrap_linear_mirror_repeat(float s, unsigned size, int offset,
332
                          int *icoord0, int *icoord1, float *w)
333
{
334
   int flr;
335
   float u;
336
   bool no_mirror;
337

338
   s += (float)offset / size;
339
   flr = util_ifloor(s);
340
   no_mirror = !(flr & 1);
341

342
   u = frac(s);
343
   if (no_mirror) {
344
      u = u * size - 0.5F;
345
   } else {
346
      u = 1.0F - u;
347
      u = u * size + 0.5F;
348
   }
349

350
   *icoord0 = util_ifloor(u);
351
   *icoord1 = (no_mirror) ? *icoord0 + 1 : *icoord0 - 1;
352

353
   if (*icoord0 < 0)
354
      *icoord0 = 1 + *icoord0;
355
   if (*icoord0 >= (int) size)
356
      *icoord0 = size - 1;
357

358
   if (*icoord1 >= (int) size)
359
      *icoord1 = size - 1;
360
   if (*icoord1 < 0)
361
      *icoord1 = 1 + *icoord1;
362

363
   *w = (no_mirror) ? frac(u) : frac(1.0f - u);
364
}
365

366

367
static void
368
wrap_linear_mirror_clamp(float s, unsigned size, int offset,
369
                         int *icoord0, int *icoord1, float *w)
370
{
371
   float u = fabsf(s * size + offset);
372
   if (u >= size)
373
      u = (float) size;
374
   u -= 0.5F;
375
   *icoord0 = util_ifloor(u);
376
   *icoord1 = *icoord0 + 1;
377
   *w = frac(u);
378
}
379

380

381
static void
382
wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset,
383
                                 int *icoord0, int *icoord1, float *w)
384
{
385
   float u = fabsf(s * size + offset);
386
   if (u >= size)
387
      u = (float) size;
388
   u -= 0.5F;
389
   *icoord0 = util_ifloor(u);
390
   *icoord1 = *icoord0 + 1;
391
   if (*icoord0 < 0)
392
      *icoord0 = 0;
393
   if (*icoord1 >= (int) size)
394
      *icoord1 = size - 1;
395
   *w = frac(u);
396
}
397

398

399
static void
400
wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
401
                                   int *icoord0, int *icoord1, float *w)
402
{
403
   const float min = -0.5F;
404
   const float max = size + 0.5F;
405
   const float t = fabsf(s * size + offset);
406
   const float u = CLAMP(t, min, max) - 0.5F;
407
   *icoord0 = util_ifloor(u);
408
   *icoord1 = *icoord0 + 1;
409
   *w = frac(u);
410
}
411

412

413
/**
414
 * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
415
 */
416
static void
417
wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
418
{
419
   const int i = util_ifloor(s);
420
   *icoord = CLAMP(i + offset, 0, (int) size-1);
421
}
422

423

424
/**
425
 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
426
 */
427
static void
428
wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord)
429
{
430
   *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) );
431
}
432

433

434
/**
435
 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
436
 */
437
static void
438
wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
439
{
440
   *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) );
441
}
442

443

444
/**
445
 * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
446
 */
447
static void
448
wrap_linear_unorm_clamp(float s, unsigned size, int offset,
449
                        int *icoord0, int *icoord1, float *w)
450
{
451
   /* Not exactly what the spec says, but it matches NVIDIA output */
452
   const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
453
   *icoord0 = util_ifloor(u);
454
   *icoord1 = *icoord0 + 1;
455
   *w = frac(u);
456
}
457

458

459
/**
460
 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
461
 */
462
static void
463
wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
464
                                  int *icoord0, int *icoord1, float *w)
465
{
466
   const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
467
   *icoord0 = util_ifloor(u);
468
   *icoord1 = *icoord0 + 1;
469
   if (*icoord1 > (int) size - 1)
470
      *icoord1 = size - 1;
471
   *w = frac(u);
472
}
473

474

475
/**
476
 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
477
 */
478
static void
479
wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
480
                                int *icoord0, int *icoord1, float *w)
481
{
482
   const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
483
   *icoord0 = util_ifloor(u);
484
   *icoord1 = *icoord0 + 1;
485
   if (*icoord1 > (int) size - 1)
486
      *icoord1 = size - 1;
487
   *w = frac(u);
488
}
489

490

491
/**
492
 * Do coordinate to array index conversion.  For array textures.
493
 */
494
static inline int
495
coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
496
{
497
   const int c = util_ifloor(coord + 0.5F);
498
   return CLAMP(c, (int)first_layer, (int)last_layer);
499
}
500

501
static void
502
compute_gradient_1d(const float s[TGSI_QUAD_SIZE],
503
                    const float t[TGSI_QUAD_SIZE],
504
                    const float p[TGSI_QUAD_SIZE],
505
                    float derivs[3][2][TGSI_QUAD_SIZE])
506
{
507
   memset(derivs, 0, 6 * TGSI_QUAD_SIZE * sizeof(float));
508
   derivs[0][0][0] = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
509
   derivs[0][1][0] = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
510
}
511

512
static float
513
compute_lambda_1d_explicit_gradients(const struct sp_sampler_view *sview,
514
                                     const float derivs[3][2][TGSI_QUAD_SIZE],
515
                                     uint quad)
516
{
517
   const struct pipe_resource *texture = sview->base.texture;
518
   const float dsdx = fabsf(derivs[0][0][quad]);
519
   const float dsdy = fabsf(derivs[0][1][quad]);
520
   const float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
521
   return util_fast_log2(rho);
522
}
523

524

525
/**
526
 * Examine the quad's texture coordinates to compute the partial
527
 * derivatives w.r.t X and Y, then compute lambda (level of detail).
528
 */
529
static float
530
compute_lambda_1d(const struct sp_sampler_view *sview,
531
                  const float s[TGSI_QUAD_SIZE],
532
                  const float t[TGSI_QUAD_SIZE],
533
                  const float p[TGSI_QUAD_SIZE])
534
{
535
   float derivs[3][2][TGSI_QUAD_SIZE];
536
   compute_gradient_1d(s, t, p, derivs);
537
   return compute_lambda_1d_explicit_gradients(sview, derivs, 0);
538
}
539

540

541
static void
542
compute_gradient_2d(const float s[TGSI_QUAD_SIZE],
543
                    const float t[TGSI_QUAD_SIZE],
544
                    const float p[TGSI_QUAD_SIZE],
545
                    float derivs[3][2][TGSI_QUAD_SIZE])
546
{
547
   memset(derivs, 0, 6 * TGSI_QUAD_SIZE * sizeof(float));
548
   derivs[0][0][0] = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
549
   derivs[0][1][0] = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
550
   derivs[1][0][0] = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
551
   derivs[1][1][0] = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
552
}
553

554
static float
555
compute_lambda_2d_explicit_gradients(const struct sp_sampler_view *sview,
556
                                     const float derivs[3][2][TGSI_QUAD_SIZE],
557
                                     uint quad)
558
{
559
   const struct pipe_resource *texture = sview->base.texture;
560
   const float dsdx = fabsf(derivs[0][0][quad]);
561
   const float dsdy = fabsf(derivs[0][1][quad]);
562
   const float dtdx = fabsf(derivs[1][0][quad]);
563
   const float dtdy = fabsf(derivs[1][1][quad]);
564
   const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
565
   const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
566
   const float rho  = MAX2(maxx, maxy);
567
   return util_fast_log2(rho);
568
}
569

570

571
static float
572
compute_lambda_2d(const struct sp_sampler_view *sview,
573
                  const float s[TGSI_QUAD_SIZE],
574
                  const float t[TGSI_QUAD_SIZE],
575
                  const float p[TGSI_QUAD_SIZE])
576
{
577
   float derivs[3][2][TGSI_QUAD_SIZE];
578
   compute_gradient_2d(s, t, p, derivs);
579
   return compute_lambda_2d_explicit_gradients(sview, derivs, 0);
580
}
581

582

583
static void
584
compute_gradient_3d(const float s[TGSI_QUAD_SIZE],
585
                    const float t[TGSI_QUAD_SIZE],
586
                    const float p[TGSI_QUAD_SIZE],
587
                    float derivs[3][2][TGSI_QUAD_SIZE])
588
{
589
   memset(derivs, 0, 6 * TGSI_QUAD_SIZE * sizeof(float));
590
   derivs[0][0][0] = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
591
   derivs[0][1][0] = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
592
   derivs[1][0][0] = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
593
   derivs[1][1][0] = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
594
   derivs[2][0][0] = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
595
   derivs[2][1][0] = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
596
}
597

598
static float
599
compute_lambda_3d_explicit_gradients(const struct sp_sampler_view *sview,
600
                                     const float derivs[3][2][TGSI_QUAD_SIZE],
601
                                     uint quad)
602
{
603
   const struct pipe_resource *texture = sview->base.texture;
604
   const float dsdx = fabsf(derivs[0][0][quad]);
605
   const float dsdy = fabsf(derivs[0][1][quad]);
606
   const float dtdx = fabsf(derivs[1][0][quad]);
607
   const float dtdy = fabsf(derivs[1][1][quad]);
608
   const float dpdx = fabsf(derivs[2][0][quad]);
609
   const float dpdy = fabsf(derivs[2][1][quad]);
610
   const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
611
   const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
612
   const float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
613
   const float rho = MAX3(maxx, maxy, maxz);
614

615
   return util_fast_log2(rho);
616
}
617

618

619
static float
620
compute_lambda_3d(const struct sp_sampler_view *sview,
621
                  const float s[TGSI_QUAD_SIZE],
622
                  const float t[TGSI_QUAD_SIZE],
623
                  const float p[TGSI_QUAD_SIZE])
624
{
625
   float derivs[3][2][TGSI_QUAD_SIZE];
626
   compute_gradient_3d(s, t, p, derivs);
627
   return compute_lambda_3d_explicit_gradients(sview, derivs, 0);
628
}
629

630

631
static float
632
compute_lambda_cube_explicit_gradients(const struct sp_sampler_view *sview,
633
                                       const float derivs[3][2][TGSI_QUAD_SIZE],
634
                                       uint quad)
635
{
636
   const struct pipe_resource *texture = sview->base.texture;
637
   const float dsdx = fabsf(derivs[0][0][quad]);
638
   const float dsdy = fabsf(derivs[0][1][quad]);
639
   const float dtdx = fabsf(derivs[1][0][quad]);
640
   const float dtdy = fabsf(derivs[1][1][quad]);
641
   const float dpdx = fabsf(derivs[2][0][quad]);
642
   const float dpdy = fabsf(derivs[2][1][quad]);
643
   const float maxx = MAX2(dsdx, dsdy);
644
   const float maxy = MAX2(dtdx, dtdy);
645
   const float maxz = MAX2(dpdx, dpdy);
646
   const float rho = MAX3(maxx, maxy, maxz) * u_minify(texture->width0, sview->base.u.tex.first_level) / 2.0f;
647

648
   return util_fast_log2(rho);
649
}
650

651
static float
652
compute_lambda_cube(const struct sp_sampler_view *sview,
653
                    const float s[TGSI_QUAD_SIZE],
654
                    const float t[TGSI_QUAD_SIZE],
655
                    const float p[TGSI_QUAD_SIZE])
656
{
657
   float derivs[3][2][TGSI_QUAD_SIZE];
658
   compute_gradient_3d(s, t, p, derivs);
659
   return compute_lambda_cube_explicit_gradients(sview, derivs, 0);
660
}
661

662
/**
663
 * Compute lambda for a vertex texture sampler.
664
 * Since there aren't derivatives to use, just return 0.
665
 */
666
static float
667
compute_lambda_vert(const struct sp_sampler_view *sview,
668
                    const float s[TGSI_QUAD_SIZE],
669
                    const float t[TGSI_QUAD_SIZE],
670
                    const float p[TGSI_QUAD_SIZE])
671
{
672
   return 0.0f;
673
}
674

675

676
compute_lambda_from_grad_func
677
softpipe_get_lambda_from_grad_func(const struct pipe_sampler_view *view,
678
                                   enum pipe_shader_type shader)
679
{
680
   switch (view->target) {
681
   case PIPE_BUFFER:
682
   case PIPE_TEXTURE_1D:
683
   case PIPE_TEXTURE_1D_ARRAY:
684
      return compute_lambda_1d_explicit_gradients;
685
   case PIPE_TEXTURE_2D:
686
   case PIPE_TEXTURE_2D_ARRAY:
687
   case PIPE_TEXTURE_RECT:
688
      return compute_lambda_2d_explicit_gradients;
689
   case PIPE_TEXTURE_CUBE:
690
   case PIPE_TEXTURE_CUBE_ARRAY:
691
      return compute_lambda_cube_explicit_gradients;
692
   case PIPE_TEXTURE_3D:
693
      return compute_lambda_3d_explicit_gradients;
694
   default:
695
      assert(0);
696
      return compute_lambda_1d_explicit_gradients;
697
   }
698
}
699

700

701
/**
702
 * Get a texel from a texture, using the texture tile cache.
703
 *
704
 * \param addr  the template tex address containing cube, z, face info.
705
 * \param x  the x coord of texel within 2D image
706
 * \param y  the y coord of texel within 2D image
707
 * \param rgba  the quad to put the texel/color into
708
 *
709
 * XXX maybe move this into sp_tex_tile_cache.c and merge with the
710
 * sp_get_cached_tile_tex() function.
711
 */
712

713

714

715
static inline const float *
716
get_texel_buffer_no_border(const struct sp_sampler_view *sp_sview,
717
                           union tex_tile_address addr, int x, unsigned elmsize)
718
{
719
   const struct softpipe_tex_cached_tile *tile;
720
   addr.bits.x = x * elmsize / TEX_TILE_SIZE;
721
   assert(x * elmsize / TEX_TILE_SIZE == addr.bits.x);
722

723
   x %= TEX_TILE_SIZE / elmsize;
724

725
   tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
726

727
   return &tile->data.color[0][x][0];
728
}
729

730

731
static inline const float *
732
get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
733
                       union tex_tile_address addr, int x, int y)
734
{
735
   const struct softpipe_tex_cached_tile *tile;
736
   addr.bits.x = x / TEX_TILE_SIZE;
737
   addr.bits.y = y / TEX_TILE_SIZE;
738
   y %= TEX_TILE_SIZE;
739
   x %= TEX_TILE_SIZE;
740

741
   tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
742

743
   return &tile->data.color[y][x][0];
744
}
745

746

747
static inline const float *
748
get_texel_2d(const struct sp_sampler_view *sp_sview,
749
             const struct sp_sampler *sp_samp,
750
             union tex_tile_address addr, int x, int y)
751
{
752
   const struct pipe_resource *texture = sp_sview->base.texture;
753
   const unsigned level = addr.bits.level;
754

755
   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
756
       y < 0 || y >= (int) u_minify(texture->height0, level)) {
757
      return sp_sview->border_color.f;
758
   }
759
   else {
760
      return get_texel_2d_no_border( sp_sview, addr, x, y );
761
   }
762
}
763

764

765
/*
766
 * Here's the complete logic (HOLY CRAP) for finding next face and doing the
767
 * corresponding coord wrapping, implemented by get_next_face,
768
 * get_next_xcoord, get_next_ycoord.
769
 * Read like that (first line):
770
 * If face is +x and s coord is below zero, then
771
 * new face is +z, new s is max , new t is old t
772
 * (max is always cube size - 1).
773
 *
774
 * +x s- -> +z: s = max,   t = t
775
 * +x s+ -> -z: s = 0,     t = t
776
 * +x t- -> +y: s = max,   t = max-s
777
 * +x t+ -> -y: s = max,   t = s
778
 *
779
 * -x s- -> -z: s = max,   t = t
780
 * -x s+ -> +z: s = 0,     t = t
781
 * -x t- -> +y: s = 0,     t = s
782
 * -x t+ -> -y: s = 0,     t = max-s
783
 *
784
 * +y s- -> -x: s = t,     t = 0
785
 * +y s+ -> +x: s = max-t, t = 0
786
 * +y t- -> -z: s = max-s, t = 0
787
 * +y t+ -> +z: s = s,     t = 0
788
 *
789
 * -y s- -> -x: s = max-t, t = max
790
 * -y s+ -> +x: s = t,     t = max
791
 * -y t- -> +z: s = s,     t = max
792
 * -y t+ -> -z: s = max-s, t = max
793

794
 * +z s- -> -x: s = max,   t = t
795
 * +z s+ -> +x: s = 0,     t = t
796
 * +z t- -> +y: s = s,     t = max
797
 * +z t+ -> -y: s = s,     t = 0
798

799
 * -z s- -> +x: s = max,   t = t
800
 * -z s+ -> -x: s = 0,     t = t
801
 * -z t- -> +y: s = max-s, t = 0
802
 * -z t+ -> -y: s = max-s, t = max
803
 */
804

805

806
/*
807
 * seamless cubemap neighbour array.
808
 * this array is used to find the adjacent face in each of 4 directions,
809
 * left, right, up, down. (or -x, +x, -y, +y).
810
 */
811
static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
812
   /* pos X first then neg X is Z different, Y the same */
813
   /* PIPE_TEX_FACE_POS_X,*/
814
   { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
815
     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
816
   /* PIPE_TEX_FACE_NEG_X */
817
   { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
818
     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
819

820
   /* pos Y first then neg Y is X different, X the same */
821
   /* PIPE_TEX_FACE_POS_Y */
822
   { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
823
     PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
824

825
   /* PIPE_TEX_FACE_NEG_Y */
826
   { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
827
     PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
828

829
   /* pos Z first then neg Y is X different, X the same */
830
   /* PIPE_TEX_FACE_POS_Z */
831
   { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
832
     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
833

834
   /* PIPE_TEX_FACE_NEG_Z */
835
   { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
836
     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
837
};
838

839
static inline unsigned
840
get_next_face(unsigned face, int idx)
841
{
842
   return face_array[face][idx];
843
}
844

845
/*
846
 * return a new xcoord based on old face, old coords, cube size
847
 * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
848
 */
849
static inline int
850
get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
851
{
852
   if ((face == 0 && fall_off_index != 1) ||
853
       (face == 1 && fall_off_index == 0) ||
854
       (face == 4 && fall_off_index == 0) ||
855
       (face == 5 && fall_off_index == 0)) {
856
      return max;
857
   }
858
   if ((face == 1 && fall_off_index != 0) ||
859
       (face == 0 && fall_off_index == 1) ||
860
       (face == 4 && fall_off_index == 1) ||
861
       (face == 5 && fall_off_index == 1)) {
862
      return 0;
863
   }
864
   if ((face == 4 && fall_off_index >= 2) ||
865
       (face == 2 && fall_off_index == 3) ||
866
       (face == 3 && fall_off_index == 2)) {
867
      return xc;
868
   }
869
   if ((face == 5 && fall_off_index >= 2) ||
870
       (face == 2 && fall_off_index == 2) ||
871
       (face == 3 && fall_off_index == 3)) {
872
      return max - xc;
873
   }
874
   if ((face == 2 && fall_off_index == 0) ||
875
       (face == 3 && fall_off_index == 1)) {
876
      return yc;
877
   }
878
   /* (face == 2 && fall_off_index == 1) ||
879
      (face == 3 && fall_off_index == 0)) */
880
   return max - yc;
881
}
882

883
/*
884
 * return a new ycoord based on old face, old coords, cube size
885
 * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
886
 */
887
static inline int
888
get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
889
{
890
   if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
891
      return yc;
892
   }
893
   if (face == 2 ||
894
       (face == 4 && fall_off_index == 3) ||
895
       (face == 5 && fall_off_index == 2)) {
896
      return 0;
897
   }
898
   if (face == 3 ||
899
       (face == 4 && fall_off_index == 2) ||
900
       (face == 5 && fall_off_index == 3)) {
901
      return max;
902
   }
903
   if ((face == 0 && fall_off_index == 3) ||
904
       (face == 1 && fall_off_index == 2)) {
905
      return xc;
906
   }
907
   /* (face == 0 && fall_off_index == 2) ||
908
      (face == 1 && fall_off_index == 3) */
909
   return max - xc;
910
}
911

912

913
/* Gather a quad of adjacent texels within a tile:
914
 */
915
static inline void
916
get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
917
                                        union tex_tile_address addr,
918
                                        unsigned x, unsigned y,
919
                                        const float *out[4])
920
{
921
    const struct softpipe_tex_cached_tile *tile;
922

923
   addr.bits.x = x / TEX_TILE_SIZE;
924
   addr.bits.y = y / TEX_TILE_SIZE;
925
   y %= TEX_TILE_SIZE;
926
   x %= TEX_TILE_SIZE;
927

928
   tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
929
      
930
   out[0] = &tile->data.color[y  ][x  ][0];
931
   out[1] = &tile->data.color[y  ][x+1][0];
932
   out[2] = &tile->data.color[y+1][x  ][0];
933
   out[3] = &tile->data.color[y+1][x+1][0];
934
}
935

936

937
/* Gather a quad of potentially non-adjacent texels:
938
 */
939
static inline void
940
get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
941
                            union tex_tile_address addr,
942
                            int x0, int y0,
943
                            int x1, int y1,
944
                            const float *out[4])
945
{
946
   out[0] = get_texel_2d_no_border( sp_sview, addr, x0, y0 );
947
   out[1] = get_texel_2d_no_border( sp_sview, addr, x1, y0 );
948
   out[2] = get_texel_2d_no_border( sp_sview, addr, x0, y1 );
949
   out[3] = get_texel_2d_no_border( sp_sview, addr, x1, y1 );
950
}
951

952

953
/* 3d variants:
954
 */
955
static inline const float *
956
get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
957
                       union tex_tile_address addr, int x, int y, int z)
958
{
959
   const struct softpipe_tex_cached_tile *tile;
960

961
   addr.bits.x = x / TEX_TILE_SIZE;
962
   addr.bits.y = y / TEX_TILE_SIZE;
963
   addr.bits.z = z;
964
   y %= TEX_TILE_SIZE;
965
   x %= TEX_TILE_SIZE;
966

967
   tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
968

969
   return &tile->data.color[y][x][0];
970
}
971

972

973
static inline const float *
974
get_texel_3d(const struct sp_sampler_view *sp_sview,
975
             const struct sp_sampler *sp_samp,
976
             union tex_tile_address addr, int x, int y, int z)
977
{
978
   const struct pipe_resource *texture = sp_sview->base.texture;
979
   const unsigned level = addr.bits.level;
980

981
   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
982
       y < 0 || y >= (int) u_minify(texture->height0, level) ||
983
       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
984
      return sp_sview->border_color.f;
985
   }
986
   else {
987
      return get_texel_3d_no_border( sp_sview, addr, x, y, z );
988
   }
989
}
990

991

992
/* Get texel pointer for 1D array texture */
993
static inline const float *
994
get_texel_1d_array(const struct sp_sampler_view *sp_sview,
995
                   const struct sp_sampler *sp_samp,
996
                   union tex_tile_address addr, int x, int y)
997
{
998
   const struct pipe_resource *texture = sp_sview->base.texture;
999
   const unsigned level = addr.bits.level;
1000

1001
   if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
1002
      return sp_sview->border_color.f;
1003
   }
1004
   else {
1005
      return get_texel_2d_no_border(sp_sview, addr, x, y);
1006
   }
1007
}
1008

1009

1010
/* Get texel pointer for 2D array texture */
1011
static inline const float *
1012
get_texel_2d_array(const struct sp_sampler_view *sp_sview,
1013
                   const struct sp_sampler *sp_samp,
1014
                   union tex_tile_address addr, int x, int y, int layer)
1015
{
1016
   const struct pipe_resource *texture = sp_sview->base.texture;
1017
   const unsigned level = addr.bits.level;
1018

1019
   assert(layer < (int) texture->array_size);
1020
   assert(layer >= 0);
1021

1022
   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
1023
       y < 0 || y >= (int) u_minify(texture->height0, level)) {
1024
      return sp_sview->border_color.f;
1025
   }
1026
   else {
1027
      return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
1028
   }
1029
}
1030

1031

1032
static inline const float *
1033
get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
1034
                        union tex_tile_address addr, int x, int y,
1035
                        float *corner, int layer, unsigned face)
1036
{
1037
   const struct pipe_resource *texture = sp_sview->base.texture;
1038
   const unsigned level = addr.bits.level;
1039
   int new_x, new_y, max_x;
1040

1041
   max_x = (int) u_minify(texture->width0, level);
1042

1043
   assert(texture->width0 == texture->height0);
1044
   new_x = x;
1045
   new_y = y;
1046

1047
   /* change the face */
1048
   if (x < 0) {
1049
      /*
1050
       * Cheat with corners. They are difficult and I believe because we don't get
1051
       * per-pixel faces we can actually have multiple corner texels per pixel,
1052
       * which screws things up majorly in any case (as the per spec behavior is
1053
       * to average the 3 remaining texels, which we might not have).
1054
       * Hence just make sure that the 2nd coord is clamped, will simply pick the
1055
       * sample which would have fallen off the x coord, but not y coord.
1056
       * So the filter weight of the samples will be wrong, but at least this
1057
       * ensures that only valid texels near the corner are used.
1058
       */
1059
      if (y < 0 || y >= max_x) {
1060
         y = CLAMP(y, 0, max_x - 1);
1061
      }
1062
      new_x = get_next_xcoord(face, 0, max_x -1, x, y);
1063
      new_y = get_next_ycoord(face, 0, max_x -1, x, y);
1064
      face = get_next_face(face, 0);
1065
   } else if (x >= max_x) {
1066
      if (y < 0 || y >= max_x) {
1067
         y = CLAMP(y, 0, max_x - 1);
1068
      }
1069
      new_x = get_next_xcoord(face, 1, max_x -1, x, y);
1070
      new_y = get_next_ycoord(face, 1, max_x -1, x, y);
1071
      face = get_next_face(face, 1);
1072
   } else if (y < 0) {
1073
      new_x = get_next_xcoord(face, 2, max_x -1, x, y);
1074
      new_y = get_next_ycoord(face, 2, max_x -1, x, y);
1075
      face = get_next_face(face, 2);
1076
   } else if (y >= max_x) {
1077
      new_x = get_next_xcoord(face, 3, max_x -1, x, y);
1078
      new_y = get_next_ycoord(face, 3, max_x -1, x, y);
1079
      face = get_next_face(face, 3);
1080
   }
1081

1082
   return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face);
1083
}
1084

1085

1086
/* Get texel pointer for cube array texture */
1087
static inline const float *
1088
get_texel_cube_array(const struct sp_sampler_view *sp_sview,
1089
                     const struct sp_sampler *sp_samp,
1090
                     union tex_tile_address addr, int x, int y, int layer)
1091
{
1092
   const struct pipe_resource *texture = sp_sview->base.texture;
1093
   const unsigned level = addr.bits.level;
1094

1095
   assert(layer < (int) texture->array_size);
1096
   assert(layer >= 0);
1097

1098
   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
1099
       y < 0 || y >= (int) u_minify(texture->height0, level)) {
1100
      return sp_sview->border_color.f;
1101
   }
1102
   else {
1103
      return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
1104
   }
1105
}
1106
/**
1107
 * Given the logbase2 of a mipmap's base level size and a mipmap level,
1108
 * return the size (in texels) of that mipmap level.
1109
 * For example, if level[0].width = 256 then base_pot will be 8.
1110
 * If level = 2, then we'll return 64 (the width at level=2).
1111
 * Return 1 if level > base_pot.
1112
 */
1113
static inline unsigned
1114
pot_level_size(unsigned base_pot, unsigned level)
1115
{
1116
   return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
1117
}
1118

1119

1120
static void
1121
print_sample(const char *function, const float *rgba)
1122
{
1123
   debug_printf("%s %g %g %g %g\n",
1124
                function,
1125
                rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
1126
}
1127

1128

1129
static void
1130
print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1131
{
1132
   debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1133
                function,
1134
                rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1135
                rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1136
                rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1137
                rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1138
}
1139

1140

1141
/* Some image-filter fastpaths:
1142
 */
1143
static inline void
1144
img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
1145
                                const struct sp_sampler *sp_samp,
1146
                                const struct img_filter_args *args,
1147
                                float *rgba)
1148
{
1149
   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1150
   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1151
   const int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
1152
   const int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
1153
   union tex_tile_address addr;
1154
   int c;
1155

1156
   const float u = (args->s * xpot - 0.5F) + args->offset[0];
1157
   const float v = (args->t * ypot - 0.5F) + args->offset[1];
1158

1159
   const int uflr = util_ifloor(u);
1160
   const int vflr = util_ifloor(v);
1161

1162
   const float xw = u - (float)uflr;
1163
   const float yw = v - (float)vflr;
1164

1165
   const int x0 = uflr & (xpot - 1);
1166
   const int y0 = vflr & (ypot - 1);
1167

1168
   const float *tx[4];
1169
      
1170
   addr.value = 0;
1171
   addr.bits.level = args->level;
1172
   addr.bits.z = sp_sview->base.u.tex.first_layer;
1173

1174
   /* Can we fetch all four at once:
1175
    */
1176
   if (x0 < xmax && y0 < ymax) {
1177
      get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
1178
   }
1179
   else {
1180
      const unsigned x1 = (x0 + 1) & (xpot - 1);
1181
      const unsigned y1 = (y0 + 1) & (ypot - 1);
1182
      get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
1183
   }
1184

1185
   /* interpolate R, G, B, A */
1186
   for (c = 0; c < TGSI_NUM_CHANNELS; c++) {
1187
      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw, 
1188
                                       tx[0][c], tx[1][c], 
1189
                                       tx[2][c], tx[3][c]);
1190
   }
1191

1192
   if (DEBUG_TEX) {
1193
      print_sample(__FUNCTION__, rgba);
1194
   }
1195
}
1196

1197

1198
static inline void
1199
img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
1200
                                 const struct sp_sampler *sp_samp,
1201
                                 const struct img_filter_args *args,
1202
                                 float *rgba)
1203
{
1204
   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1205
   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1206
   const float *out;
1207
   union tex_tile_address addr;
1208
   int c;
1209

1210
   const float u = args->s * xpot + args->offset[0];
1211
   const float v = args->t * ypot + args->offset[1];
1212

1213
   const int uflr = util_ifloor(u);
1214
   const int vflr = util_ifloor(v);
1215

1216
   const int x0 = uflr & (xpot - 1);
1217
   const int y0 = vflr & (ypot - 1);
1218

1219
   addr.value = 0;
1220
   addr.bits.level = args->level;
1221
   addr.bits.z = sp_sview->base.u.tex.first_layer;
1222

1223
   out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1224
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1225
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1226

1227
   if (DEBUG_TEX) {
1228
      print_sample(__FUNCTION__, rgba);
1229
   }
1230
}
1231

1232

1233
static inline void
1234
img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
1235
                                const struct sp_sampler *sp_samp,
1236
                                const struct img_filter_args *args,
1237
                                float *rgba)
1238
{
1239
   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1240
   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1241
   union tex_tile_address addr;
1242
   int c;
1243

1244
   const float u = args->s * xpot + args->offset[0];
1245
   const float v = args->t * ypot + args->offset[1];
1246

1247
   int x0, y0;
1248
   const float *out;
1249

1250
   addr.value = 0;
1251
   addr.bits.level = args->level;
1252
   addr.bits.z = sp_sview->base.u.tex.first_layer;
1253

1254
   x0 = util_ifloor(u);
1255
   if (x0 < 0) 
1256
      x0 = 0;
1257
   else if (x0 > (int) xpot - 1)
1258
      x0 = xpot - 1;
1259

1260
   y0 = util_ifloor(v);
1261
   if (y0 < 0) 
1262
      y0 = 0;
1263
   else if (y0 > (int) ypot - 1)
1264
      y0 = ypot - 1;
1265
   
1266
   out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1267
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1268
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1269

1270
   if (DEBUG_TEX) {
1271
      print_sample(__FUNCTION__, rgba);
1272
   }
1273
}
1274

1275

1276
static void
1277
img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
1278
                      const struct sp_sampler *sp_samp,
1279
                      const struct img_filter_args *args,
1280
                      float *rgba)
1281
{
1282
   const struct pipe_resource *texture = sp_sview->base.texture;
1283
   const int width = u_minify(texture->width0, args->level);
1284
   int x;
1285
   union tex_tile_address addr;
1286
   const float *out;
1287
   int c;
1288

1289
   assert(width > 0);
1290

1291
   addr.value = 0;
1292
   addr.bits.level = args->level;
1293

1294
   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1295

1296
   out = get_texel_1d_array(sp_sview, sp_samp, addr, x,
1297
                            sp_sview->base.u.tex.first_layer);
1298
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1299
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1300

1301
   if (DEBUG_TEX) {
1302
      print_sample(__FUNCTION__, rgba);
1303
   }
1304
}
1305

1306

1307
static void
1308
img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
1309
                            const struct sp_sampler *sp_samp,
1310
                            const struct img_filter_args *args,
1311
                            float *rgba)
1312
{
1313
   const struct pipe_resource *texture = sp_sview->base.texture;
1314
   const int width = u_minify(texture->width0, args->level);
1315
   const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1316
                                    sp_sview->base.u.tex.last_layer);
1317
   int x;
1318
   union tex_tile_address addr;
1319
   const float *out;
1320
   int c;
1321

1322
   assert(width > 0);
1323

1324
   addr.value = 0;
1325
   addr.bits.level = args->level;
1326

1327
   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1328

1329
   out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
1330
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1331
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1332

1333
   if (DEBUG_TEX) {
1334
      print_sample(__FUNCTION__, rgba);
1335
   }
1336
}
1337

1338

1339
static void
1340
img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
1341
                      const struct sp_sampler *sp_samp,
1342
                      const struct img_filter_args *args,
1343
                      float *rgba)
1344
{
1345
   const struct pipe_resource *texture = sp_sview->base.texture;
1346
   const int width = u_minify(texture->width0, args->level);
1347
   const int height = u_minify(texture->height0, args->level);
1348
   int x, y;
1349
   union tex_tile_address addr;
1350
   const float *out;
1351
   int c;
1352

1353
   assert(width > 0);
1354
   assert(height > 0);
1355
 
1356
   addr.value = 0;
1357
   addr.bits.level = args->level;
1358
   addr.bits.z = sp_sview->base.u.tex.first_layer;
1359

1360
   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1361
   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1362

1363
   out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
1364
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1365
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1366

1367
   if (DEBUG_TEX) {
1368
      print_sample(__FUNCTION__, rgba);
1369
   }
1370
}
1371

1372

1373
static void
1374
img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
1375
                            const struct sp_sampler *sp_samp,
1376
                            const struct img_filter_args *args,
1377
                            float *rgba)
1378
{
1379
   const struct pipe_resource *texture = sp_sview->base.texture;
1380
   const int width = u_minify(texture->width0, args->level);
1381
   const int height = u_minify(texture->height0, args->level);
1382
   const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1383
                                    sp_sview->base.u.tex.last_layer);
1384
   int x, y;
1385
   union tex_tile_address addr;
1386
   const float *out;
1387
   int c;
1388

1389
   assert(width > 0);
1390
   assert(height > 0);
1391
 
1392
   addr.value = 0;
1393
   addr.bits.level = args->level;
1394

1395
   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1396
   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1397

1398
   out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
1399
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1400
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1401

1402
   if (DEBUG_TEX) {
1403
      print_sample(__FUNCTION__, rgba);
1404
   }
1405
}
1406

1407

1408
static void
1409
img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
1410
                        const struct sp_sampler *sp_samp,
1411
                        const struct img_filter_args *args,
1412
                        float *rgba)
1413
{
1414
   const struct pipe_resource *texture = sp_sview->base.texture;
1415
   const int width = u_minify(texture->width0, args->level);
1416
   const int height = u_minify(texture->height0, args->level);
1417
   const int layerface = args->face_id + sp_sview->base.u.tex.first_layer;
1418
   int x, y;
1419
   union tex_tile_address addr;
1420
   const float *out;
1421
   int c;
1422

1423
   assert(width > 0);
1424
   assert(height > 0);
1425
 
1426
   addr.value = 0;
1427
   addr.bits.level = args->level;
1428

1429
   /*
1430
    * If NEAREST filtering is done within a miplevel, always apply wrap
1431
    * mode CLAMP_TO_EDGE.
1432
    */
1433
   if (sp_samp->base.seamless_cube_map) {
1434
      wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x);
1435
      wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y);
1436
   } else {
1437
      /* Would probably make sense to ignore mode and just do edge clamp */
1438
      sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1439
      sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1440
   }
1441

1442
   out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1443
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1444
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1445

1446
   if (DEBUG_TEX) {
1447
      print_sample(__FUNCTION__, rgba);
1448
   }
1449
}
1450

1451
static void
1452
img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
1453
                              const struct sp_sampler *sp_samp,
1454
                              const struct img_filter_args *args,
1455
                              float *rgba)
1456
{
1457
   const struct pipe_resource *texture = sp_sview->base.texture;
1458
   const int width = u_minify(texture->width0, args->level);
1459
   const int height = u_minify(texture->height0, args->level);
1460
   const int layerface = CLAMP(6 * util_ifloor(args->p + 0.5f) + sp_sview->base.u.tex.first_layer,
1461
                               sp_sview->base.u.tex.first_layer,
1462
                               sp_sview->base.u.tex.last_layer - 5) + args->face_id;
1463
   int x, y;
1464
   union tex_tile_address addr;
1465
   const float *out;
1466
   int c;
1467

1468
   assert(width > 0);
1469
   assert(height > 0);
1470
 
1471
   addr.value = 0;
1472
   addr.bits.level = args->level;
1473

1474
   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1475
   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1476

1477
   out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1478
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1479
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1480

1481
   if (DEBUG_TEX) {
1482
      print_sample(__FUNCTION__, rgba);
1483
   }
1484
}
1485

1486
static void
1487
img_filter_3d_nearest(const struct sp_sampler_view *sp_sview,
1488
                      const struct sp_sampler *sp_samp,
1489
                      const struct img_filter_args *args,
1490
                      float *rgba)
1491
{
1492
   const struct pipe_resource *texture = sp_sview->base.texture;
1493
   const int width = u_minify(texture->width0, args->level);
1494
   const int height = u_minify(texture->height0, args->level);
1495
   const int depth = u_minify(texture->depth0, args->level);
1496
   int x, y, z;
1497
   union tex_tile_address addr;
1498
   const float *out;
1499
   int c;
1500

1501
   assert(width > 0);
1502
   assert(height > 0);
1503
   assert(depth > 0);
1504

1505
   sp_samp->nearest_texcoord_s(args->s, width,  args->offset[0], &x);
1506
   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1507
   sp_samp->nearest_texcoord_p(args->p, depth,  args->offset[2], &z);
1508

1509
   addr.value = 0;
1510
   addr.bits.level = args->level;
1511

1512
   out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
1513
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1514
      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1515
}
1516

1517

1518
static void
1519
img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
1520
                     const struct sp_sampler *sp_samp,
1521
                     const struct img_filter_args *args,
1522
                     float *rgba)
1523
{
1524
   const struct pipe_resource *texture = sp_sview->base.texture;
1525
   const int width = u_minify(texture->width0, args->level);
1526
   int x0, x1;
1527
   float xw; /* weights */
1528
   union tex_tile_address addr;
1529
   const float *tx0, *tx1;
1530
   int c;
1531

1532
   assert(width > 0);
1533

1534
   addr.value = 0;
1535
   addr.bits.level = args->level;
1536

1537
   sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1538

1539
   tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0,
1540
                            sp_sview->base.u.tex.first_layer);
1541
   tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1,
1542
                            sp_sview->base.u.tex.first_layer);
1543

1544
   /* interpolate R, G, B, A */
1545
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1546
      rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1547
}
1548

1549

1550
static void
1551
img_filter_1d_array_linear(const struct sp_sampler_view *sp_sview,
1552
                           const struct sp_sampler *sp_samp,
1553
                           const struct img_filter_args *args,
1554
                           float *rgba)
1555
{
1556
   const struct pipe_resource *texture = sp_sview->base.texture;
1557
   const int width = u_minify(texture->width0, args->level);
1558
   const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1559
                                    sp_sview->base.u.tex.last_layer);
1560
   int x0, x1;
1561
   float xw; /* weights */
1562
   union tex_tile_address addr;
1563
   const float *tx0, *tx1;
1564
   int c;
1565

1566
   assert(width > 0);
1567

1568
   addr.value = 0;
1569
   addr.bits.level = args->level;
1570

1571
   sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1572

1573
   tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
1574
   tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
1575

1576
   /* interpolate R, G, B, A */
1577
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1578
      rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1579
}
1580

1581
/*
1582
 * Retrieve the gathered value, need to convert to the
1583
 * TGSI expected interface, and take component select
1584
 * and swizzling into account.
1585
 */
1586
static float
1587
get_gather_value(const struct sp_sampler_view *sp_sview,
1588
                 int chan_in, int comp_sel,
1589
                 const float *tx[4])
1590
{
1591
   int chan;
1592
   unsigned swizzle;
1593

1594
   /*
1595
    * softpipe samples in a different order
1596
    * to TGSI expects, so we need to swizzle,
1597
    * the samples into the correct slots.
1598
    */
1599
   switch (chan_in) {
1600
   case 0:
1601
      chan = 2;
1602
      break;
1603
   case 1:
1604
      chan = 3;
1605
      break;
1606
   case 2:
1607
      chan = 1;
1608
      break;
1609
   case 3:
1610
      chan = 0;
1611
      break;
1612
   default:
1613
      assert(0);
1614
      return 0.0;
1615
   }
1616

1617
   /* pick which component to use for the swizzle */
1618
   switch (comp_sel) {
1619
   case 0:
1620
      swizzle = sp_sview->base.swizzle_r;
1621
      break;
1622
   case 1:
1623
      swizzle = sp_sview->base.swizzle_g;
1624
      break;
1625
   case 2:
1626
      swizzle = sp_sview->base.swizzle_b;
1627
      break;
1628
   case 3:
1629
      swizzle = sp_sview->base.swizzle_a;
1630
      break;
1631
   default:
1632
      assert(0);
1633
      return 0.0;
1634
   }
1635

1636
   /* get correct result using the channel and swizzle */
1637
   switch (swizzle) {
1638
   case PIPE_SWIZZLE_0:
1639
      return 0.0;
1640
   case PIPE_SWIZZLE_1:
1641
      return sp_sview->oneval;
1642
   default:
1643
      return tx[chan][swizzle];
1644
   }
1645
}
1646

1647

1648
static void
1649
img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
1650
                     const struct sp_sampler *sp_samp,
1651
                     const struct img_filter_args *args,
1652
                     float *rgba)
1653
{
1654
   const struct pipe_resource *texture = sp_sview->base.texture;
1655
   const int width = u_minify(texture->width0, args->level);
1656
   const int height = u_minify(texture->height0, args->level);
1657
   int x0, y0, x1, y1;
1658
   float xw, yw; /* weights */
1659
   union tex_tile_address addr;
1660
   const float *tx[4];
1661
   int c;
1662

1663
   assert(width > 0);
1664
   assert(height > 0);
1665

1666
   addr.value = 0;
1667
   addr.bits.level = args->level;
1668
   addr.bits.z = sp_sview->base.u.tex.first_layer;
1669

1670
   sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1671
   sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1672

1673
   tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
1674
   tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
1675
   tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
1676
   tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
1677

1678
   if (args->gather_only) {
1679
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1680
         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1681
                                                      args->gather_comp,
1682
                                                      tx);
1683
   } else {
1684
      /* interpolate R, G, B, A */
1685
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1686
         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1687
                                             tx[0][c], tx[1][c],
1688
                                             tx[2][c], tx[3][c]);
1689
   }
1690
}
1691

1692

1693
static void
1694
img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
1695
                           const struct sp_sampler *sp_samp,
1696
                           const struct img_filter_args *args,
1697
                           float *rgba)
1698
{
1699
   const struct pipe_resource *texture = sp_sview->base.texture;
1700
   const int width = u_minify(texture->width0, args->level);
1701
   const int height = u_minify(texture->height0, args->level);
1702
   const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1703
                                    sp_sview->base.u.tex.last_layer);
1704
   int x0, y0, x1, y1;
1705
   float xw, yw; /* weights */
1706
   union tex_tile_address addr;
1707
   const float *tx[4];
1708
   int c;
1709

1710
   assert(width > 0);
1711
   assert(height > 0);
1712

1713
   addr.value = 0;
1714
   addr.bits.level = args->level;
1715

1716
   sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1717
   sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1718

1719
   tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
1720
   tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
1721
   tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
1722
   tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
1723

1724
   if (args->gather_only) {
1725
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1726
         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1727
                                                      args->gather_comp,
1728
                                                      tx);
1729
   } else {
1730
      /* interpolate R, G, B, A */
1731
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1732
         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1733
                                             tx[0][c], tx[1][c],
1734
                                             tx[2][c], tx[3][c]);
1735
   }
1736
}
1737

1738

1739
static void
1740
img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
1741
                       const struct sp_sampler *sp_samp,
1742
                       const struct img_filter_args *args,
1743
                       float *rgba)
1744
{
1745
   const struct pipe_resource *texture = sp_sview->base.texture;
1746
   const int width = u_minify(texture->width0, args->level);
1747
   const int height = u_minify(texture->height0, args->level);
1748
   const int layer = sp_sview->base.u.tex.first_layer;
1749
   int x0, y0, x1, y1;
1750
   float xw, yw; /* weights */
1751
   union tex_tile_address addr;
1752
   const float *tx[4];
1753
   float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1754
         corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1755
   int c;
1756

1757
   assert(width > 0);
1758
   assert(height > 0);
1759

1760
   addr.value = 0;
1761
   addr.bits.level = args->level;
1762

1763
   /*
1764
    * For seamless if LINEAR filtering is done within a miplevel,
1765
    * always apply wrap mode CLAMP_TO_BORDER.
1766
    */
1767
   if (sp_samp->base.seamless_cube_map) {
1768
      /* Note this is a bit overkill, actual clamping is not required */
1769
      wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1770
      wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1771
   } else {
1772
      /* Would probably make sense to ignore mode and just do edge clamp */
1773
      sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1774
      sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1775
   }
1776

1777
   if (sp_samp->base.seamless_cube_map) {
1778
      tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1779
      tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1780
      tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1781
      tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1782
   } else {
1783
      tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1784
      tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1785
      tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1786
      tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1787
   }
1788

1789
   if (args->gather_only) {
1790
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1791
         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1792
                                                      args->gather_comp,
1793
                                                      tx);
1794
   } else {
1795
      /* interpolate R, G, B, A */
1796
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1797
         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1798
                                             tx[0][c], tx[1][c],
1799
                                             tx[2][c], tx[3][c]);
1800
   }
1801
}
1802

1803

1804
static void
1805
img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
1806
                             const struct sp_sampler *sp_samp,
1807
                             const struct img_filter_args *args,
1808
                             float *rgba)
1809
{
1810
   const struct pipe_resource *texture = sp_sview->base.texture;
1811
   const int width = u_minify(texture->width0, args->level);
1812
   const int height = u_minify(texture->height0, args->level);
1813

1814
   const int layer = CLAMP(6 * util_ifloor(args->p + 0.5f) + sp_sview->base.u.tex.first_layer,
1815
                           sp_sview->base.u.tex.first_layer,
1816
                           sp_sview->base.u.tex.last_layer - 5);
1817

1818
   int x0, y0, x1, y1;
1819
   float xw, yw; /* weights */
1820
   union tex_tile_address addr;
1821
   const float *tx[4];
1822
   float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1823
         corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1824
   int c;
1825

1826
   assert(width > 0);
1827
   assert(height > 0);
1828

1829
   addr.value = 0;
1830
   addr.bits.level = args->level;
1831

1832
   /*
1833
    * For seamless if LINEAR filtering is done within a miplevel,
1834
    * always apply wrap mode CLAMP_TO_BORDER.
1835
    */
1836
   if (sp_samp->base.seamless_cube_map) {
1837
      /* Note this is a bit overkill, actual clamping is not required */
1838
      wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1839
      wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1840
   } else {
1841
      /* Would probably make sense to ignore mode and just do edge clamp */
1842
      sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1843
      sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1844
   }
1845

1846
   if (sp_samp->base.seamless_cube_map) {
1847
      tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1848
      tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1849
      tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1850
      tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1851
   } else {
1852
      tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1853
      tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1854
      tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1855
      tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1856
   }
1857

1858
   if (args->gather_only) {
1859
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1860
         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1861
                                                      args->gather_comp,
1862
                                                      tx);
1863
   } else {
1864
      /* interpolate R, G, B, A */
1865
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1866
         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1867
                                             tx[0][c], tx[1][c],
1868
                                             tx[2][c], tx[3][c]);
1869
   }
1870
}
1871

1872
static void
1873
img_filter_3d_linear(const struct sp_sampler_view *sp_sview,
1874
                     const struct sp_sampler *sp_samp,
1875
                     const struct img_filter_args *args,
1876
                     float *rgba)
1877
{
1878
   const struct pipe_resource *texture = sp_sview->base.texture;
1879
   const int width = u_minify(texture->width0, args->level);
1880
   const int height = u_minify(texture->height0, args->level);
1881
   const int depth = u_minify(texture->depth0, args->level);
1882
   int x0, x1, y0, y1, z0, z1;
1883
   float xw, yw, zw; /* interpolation weights */
1884
   union tex_tile_address addr;
1885
   const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1886
   int c;
1887

1888
   addr.value = 0;
1889
   addr.bits.level = args->level;
1890

1891
   assert(width > 0);
1892
   assert(height > 0);
1893
   assert(depth > 0);
1894

1895
   sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
1896
   sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1897
   sp_samp->linear_texcoord_p(args->p, depth,  args->offset[2], &z0, &z1, &zw);
1898

1899
   tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
1900
   tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
1901
   tx02 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z0);
1902
   tx03 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z0);
1903
      
1904
   tx10 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z1);
1905
   tx11 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z1);
1906
   tx12 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z1);
1907
   tx13 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z1);
1908
      
1909
      /* interpolate R, G, B, A */
1910
   for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1911
      rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1912
                                           tx00[c], tx01[c],
1913
                                           tx02[c], tx03[c],
1914
                                           tx10[c], tx11[c],
1915
                                           tx12[c], tx13[c]);
1916
}
1917

1918

1919
/* Calculate level of detail for every fragment,
1920
 * with lambda already computed.
1921
 * Note that lambda has already been biased by global LOD bias.
1922
 * \param biased_lambda per-quad lambda.
1923
 * \param lod_in per-fragment lod_bias or explicit_lod.
1924
 * \param lod returns the per-fragment lod.
1925
 */
1926
static inline void
1927
compute_lod(const struct pipe_sampler_state *sampler,
1928
            enum tgsi_sampler_control control,
1929
            const float biased_lambda,
1930
            const float lod_in[TGSI_QUAD_SIZE],
1931
            float lod[TGSI_QUAD_SIZE])
1932
{
1933
   const float min_lod = sampler->min_lod;
1934
   const float max_lod = sampler->max_lod;
1935
   uint i;
1936

1937
   switch (control) {
1938
   case TGSI_SAMPLER_LOD_NONE:
1939
   case TGSI_SAMPLER_LOD_ZERO:
1940
      lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1941
      break;
1942
   case TGSI_SAMPLER_DERIVS_EXPLICIT:
1943
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1944
         lod[i] = lod_in[i];
1945
      break;
1946
   case TGSI_SAMPLER_LOD_BIAS:
1947
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1948
         lod[i] = biased_lambda + lod_in[i];
1949
         lod[i] = CLAMP(lod[i], min_lod, max_lod);
1950
      }
1951
      break;
1952
   case TGSI_SAMPLER_LOD_EXPLICIT:
1953
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1954
         lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1955
      }
1956
      break;
1957
   default:
1958
      assert(0);
1959
      lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1960
   }
1961
}
1962

1963

1964
/* Calculate level of detail for every fragment. The computed value is not
1965
 * clamped to lod_min and lod_max.
1966
 * \param lod_in per-fragment lod_bias or explicit_lod.
1967
 * \param lod results per-fragment lod.
1968
 */
1969
static inline void
1970
compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
1971
                             const struct sp_sampler *sp_samp,
1972
                             const float s[TGSI_QUAD_SIZE],
1973
                             const float t[TGSI_QUAD_SIZE],
1974
                             const float p[TGSI_QUAD_SIZE],
1975
                             const float derivs[3][2][TGSI_QUAD_SIZE],
1976
                             const float lod_in[TGSI_QUAD_SIZE],
1977
                             enum tgsi_sampler_control control,
1978
                             float lod[TGSI_QUAD_SIZE])
1979
{
1980
   const struct pipe_sampler_state *sampler = &sp_samp->base;
1981
   const float lod_bias = sampler->lod_bias;
1982
   float lambda;
1983
   uint i;
1984

1985
   switch (control) {
1986
   case TGSI_SAMPLER_LOD_NONE:
1987
      lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1988
      lod[0] = lod[1] = lod[2] = lod[3] = lambda;
1989
      break;
1990
   case TGSI_SAMPLER_DERIVS_EXPLICIT:
1991
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1992
         lod[i] = sp_sview->compute_lambda_from_grad(sp_sview, derivs, i);
1993
      break;
1994
   case TGSI_SAMPLER_LOD_BIAS:
1995
      lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1996
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1997
         lod[i] = lambda + lod_in[i];
1998
      }
1999
      break;
2000
   case TGSI_SAMPLER_LOD_EXPLICIT:
2001
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2002
         lod[i] = lod_in[i] + lod_bias;
2003
      }
2004
      break;
2005
   case TGSI_SAMPLER_LOD_ZERO:
2006
   case TGSI_SAMPLER_GATHER:
2007
      lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
2008
      break;
2009
   default:
2010
      assert(0);
2011
      lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
2012
   }
2013
}
2014

2015
/* Calculate level of detail for every fragment.
2016
 * \param lod_in per-fragment lod_bias or explicit_lod.
2017
 * \param lod results per-fragment lod.
2018
 */
2019
static inline void
2020
compute_lambda_lod(const struct sp_sampler_view *sp_sview,
2021
                   const struct sp_sampler *sp_samp,
2022
                   const float s[TGSI_QUAD_SIZE],
2023
                   const float t[TGSI_QUAD_SIZE],
2024
                   const float p[TGSI_QUAD_SIZE],
2025
                   float derivs[3][2][TGSI_QUAD_SIZE],
2026
                   const float lod_in[TGSI_QUAD_SIZE],
2027
                   enum tgsi_sampler_control control,
2028
                   float lod[TGSI_QUAD_SIZE])
2029
{
2030
   const struct pipe_sampler_state *sampler = &sp_samp->base;
2031
   const float min_lod = sampler->min_lod;
2032
   const float max_lod = sampler->max_lod;
2033
   int i;
2034

2035
   compute_lambda_lod_unclamped(sp_sview, sp_samp,
2036
                                s, t, p, derivs, lod_in, control, lod);
2037
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2038
      lod[i] = CLAMP(lod[i], min_lod, max_lod);
2039
   }
2040
}
2041

2042
static inline unsigned
2043
get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
2044
{
2045
   /* gather component is stored in lod_in slot as unsigned */
2046
   return (*(unsigned int *)lod_in) & 0x3;
2047
}
2048

2049
/**
2050
 * Clamps given lod to both lod limits and mip level limits. Clamping to the
2051
 * latter limits is done so that lod is relative to the first (base) level.
2052
 */
2053
static void
2054
clamp_lod(const struct sp_sampler_view *sp_sview,
2055
          const struct sp_sampler *sp_samp,
2056
          const float lod[TGSI_QUAD_SIZE],
2057
          float clamped[TGSI_QUAD_SIZE])
2058
{
2059
   const float min_lod = sp_samp->base.min_lod;
2060
   const float max_lod = sp_samp->base.max_lod;
2061
   const float min_level = sp_sview->base.u.tex.first_level;
2062
   const float max_level = sp_sview->base.u.tex.last_level;
2063
   int i;
2064

2065
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2066
      float cl = lod[i];
2067

2068
      cl = CLAMP(cl, min_lod, max_lod);
2069
      cl = CLAMP(cl, 0, max_level - min_level);
2070
      clamped[i] = cl;
2071
   }
2072
}
2073

2074
/**
2075
 * Get mip level relative to base level for linear mip filter
2076
 */
2077
static void
2078
mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
2079
                     const struct sp_sampler *sp_samp,
2080
                     const float lod[TGSI_QUAD_SIZE],
2081
                     float level[TGSI_QUAD_SIZE])
2082
{
2083
   clamp_lod(sp_sview, sp_samp, lod, level);
2084
}
2085

2086
static void
2087
mip_filter_linear(const struct sp_sampler_view *sp_sview,
2088
                  const struct sp_sampler *sp_samp,
2089
                  img_filter_func min_filter,
2090
                  img_filter_func mag_filter,
2091
                  const float s[TGSI_QUAD_SIZE],
2092
                  const float t[TGSI_QUAD_SIZE],
2093
                  const float p[TGSI_QUAD_SIZE],
2094
                  int gather_comp,
2095
                  const float lod[TGSI_QUAD_SIZE],
2096
                  const struct filter_args *filt_args,
2097
                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2098
{
2099
   const struct pipe_sampler_view *psview = &sp_sview->base;
2100
   int j;
2101
   struct img_filter_args args;
2102

2103
   args.offset = filt_args->offset;
2104
   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2105
   args.gather_comp = gather_comp;
2106

2107
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2108
      const int level0 = psview->u.tex.first_level + (int)lod[j];
2109

2110
      args.s = s[j];
2111
      args.t = t[j];
2112
      args.p = p[j];
2113
      args.face_id = filt_args->faces[j];
2114

2115
      if (lod[j] <= 0.0 && !args.gather_only) {
2116
         args.level = psview->u.tex.first_level;
2117
         mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2118
      }
2119
      else if (level0 >= (int) psview->u.tex.last_level) {
2120
         args.level = psview->u.tex.last_level;
2121
         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2122
      }
2123
      else {
2124
         float levelBlend = frac(lod[j]);
2125
         float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2126
         int c;
2127

2128
         args.level = level0;
2129
         min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
2130
         args.level = level0+1;
2131
         min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
2132

2133
         for (c = 0; c < 4; c++) {
2134
            rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2135
         }
2136
      }
2137
   }
2138

2139
   if (DEBUG_TEX) {
2140
      print_sample_4(__FUNCTION__, rgba);
2141
   }
2142
}
2143

2144

2145
/**
2146
 * Get mip level relative to base level for nearest mip filter
2147
 */
2148
static void
2149
mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
2150
                      const struct sp_sampler *sp_samp,
2151
                      const float lod[TGSI_QUAD_SIZE],
2152
                      float level[TGSI_QUAD_SIZE])
2153
{
2154
   int j;
2155

2156
   clamp_lod(sp_sview, sp_samp, lod, level);
2157
   for (j = 0; j < TGSI_QUAD_SIZE; j++)
2158
      /* TODO: It should rather be:
2159
       * level[j] = ceil(level[j] + 0.5F) - 1.0F;
2160
       */
2161
      level[j] = (int)(level[j] + 0.5F);
2162
}
2163

2164
/**
2165
 * Compute nearest mipmap level from texcoords.
2166
 * Then sample the texture level for four elements of a quad.
2167
 * \param c0  the LOD bias factors, or absolute LODs (depending on control)
2168
 */
2169
static void
2170
mip_filter_nearest(const struct sp_sampler_view *sp_sview,
2171
                   const struct sp_sampler *sp_samp,
2172
                   img_filter_func min_filter,
2173
                   img_filter_func mag_filter,
2174
                   const float s[TGSI_QUAD_SIZE],
2175
                   const float t[TGSI_QUAD_SIZE],
2176
                   const float p[TGSI_QUAD_SIZE],
2177
                   int gather_component,
2178
                   const float lod[TGSI_QUAD_SIZE],
2179
                   const struct filter_args *filt_args,
2180
                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2181
{
2182
   const struct pipe_sampler_view *psview = &sp_sview->base;
2183
   int j;
2184
   struct img_filter_args args;
2185

2186
   args.offset = filt_args->offset;
2187
   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2188
   args.gather_comp = gather_component;
2189

2190
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2191
      args.s = s[j];
2192
      args.t = t[j];
2193
      args.p = p[j];
2194
      args.face_id = filt_args->faces[j];
2195

2196
      if (lod[j] <= 0.0f && !args.gather_only) {
2197
         args.level = psview->u.tex.first_level;
2198
         mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2199
      } else {
2200
         const int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
2201
         args.level = MIN2(level, (int)psview->u.tex.last_level);
2202
         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2203
      }
2204
   }
2205

2206
   if (DEBUG_TEX) {
2207
      print_sample_4(__FUNCTION__, rgba);
2208
   }
2209
}
2210

2211

2212
/**
2213
 * Get mip level relative to base level for none mip filter
2214
 */
2215
static void
2216
mip_rel_level_none(const struct sp_sampler_view *sp_sview,
2217
                   const struct sp_sampler *sp_samp,
2218
                   const float lod[TGSI_QUAD_SIZE],
2219
                   float level[TGSI_QUAD_SIZE])
2220
{
2221
   int j;
2222

2223
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2224
      level[j] = 0;
2225
   }
2226
}
2227

2228
static void
2229
mip_filter_none(const struct sp_sampler_view *sp_sview,
2230
                const struct sp_sampler *sp_samp,
2231
                img_filter_func min_filter,
2232
                img_filter_func mag_filter,
2233
                const float s[TGSI_QUAD_SIZE],
2234
                const float t[TGSI_QUAD_SIZE],
2235
                const float p[TGSI_QUAD_SIZE],
2236
                int gather_component,
2237
                const float lod[TGSI_QUAD_SIZE],
2238
                const struct filter_args *filt_args,
2239
                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2240
{
2241
   int j;
2242
   struct img_filter_args args;
2243

2244
   args.level = sp_sview->base.u.tex.first_level;
2245
   args.offset = filt_args->offset;
2246
   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2247
   args.gather_comp = gather_component;
2248

2249
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2250
      args.s = s[j];
2251
      args.t = t[j];
2252
      args.p = p[j];
2253
      args.face_id = filt_args->faces[j];
2254
      if (lod[j] <= 0.0f && !args.gather_only) {
2255
         mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2256
      }
2257
      else {
2258
         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2259
      }
2260
   }
2261
}
2262

2263

2264
/**
2265
 * Get mip level relative to base level for none mip filter
2266
 */
2267
static void
2268
mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
2269
                                    const struct sp_sampler *sp_samp,
2270
                                    const float lod[TGSI_QUAD_SIZE],
2271
                                    float level[TGSI_QUAD_SIZE])
2272
{
2273
   mip_rel_level_none(sp_sview, sp_samp, lod, level);
2274
}
2275

2276
static void
2277
mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
2278
                                 const struct sp_sampler *sp_samp,
2279
                                 img_filter_func min_filter,
2280
                                 img_filter_func mag_filter,
2281
                                 const float s[TGSI_QUAD_SIZE],
2282
                                 const float t[TGSI_QUAD_SIZE],
2283
                                 const float p[TGSI_QUAD_SIZE],
2284
                                 int gather_comp,
2285
                                 const float lod_in[TGSI_QUAD_SIZE],
2286
                                 const struct filter_args *filt_args,
2287
                                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2288
{
2289
   int j;
2290
   struct img_filter_args args;
2291
   args.level = sp_sview->base.u.tex.first_level;
2292
   args.offset = filt_args->offset;
2293
   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2294
   args.gather_comp = gather_comp;
2295
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2296
      args.s = s[j];
2297
      args.t = t[j];
2298
      args.p = p[j];
2299
      args.face_id = filt_args->faces[j];
2300
      mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2301
   }
2302
}
2303

2304

2305
/* For anisotropic filtering */
2306
#define WEIGHT_LUT_SIZE 1024
2307

2308
static const float *weightLut = NULL;
2309

2310
/**
2311
 * Creates the look-up table used to speed-up EWA sampling
2312
 */
2313
static void
2314
create_filter_table(void)
2315
{
2316
   unsigned i;
2317
   if (!weightLut) {
2318
      float *lut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
2319

2320
      for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
2321
         const float alpha = 2;
2322
         const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2323
         const float weight = (float) expf(-alpha * r2);
2324
         lut[i] = weight;
2325
      }
2326
      weightLut = lut;
2327
   }
2328
}
2329

2330

2331
/**
2332
 * Elliptical weighted average (EWA) filter for producing high quality
2333
 * anisotropic filtered results.
2334
 * Based on the Higher Quality Elliptical Weighted Average Filter
2335
 * published by Paul S. Heckbert in his Master's Thesis
2336
 * "Fundamentals of Texture Mapping and Image Warping" (1989)
2337
 */
2338
static void
2339
img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
2340
                  const struct sp_sampler *sp_samp,
2341
                  img_filter_func min_filter,
2342
                  img_filter_func mag_filter,
2343
                  const float s[TGSI_QUAD_SIZE],
2344
                  const float t[TGSI_QUAD_SIZE],
2345
                  const float p[TGSI_QUAD_SIZE],
2346
                  const uint faces[TGSI_QUAD_SIZE],
2347
                  const int8_t *offset,
2348
                  unsigned level,
2349
                  const float dudx, const float dvdx,
2350
                  const float dudy, const float dvdy,
2351
                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2352
{
2353
   const struct pipe_resource *texture = sp_sview->base.texture;
2354

2355
   // ??? Won't the image filters blow up if level is negative?
2356
   const unsigned level0 = level > 0 ? level : 0;
2357
   const float scaling = 1.0f / (1 << level0);
2358
   const int width = u_minify(texture->width0, level0);
2359
   const int height = u_minify(texture->height0, level0);
2360
   struct img_filter_args args;
2361
   const float ux = dudx * scaling;
2362
   const float vx = dvdx * scaling;
2363
   const float uy = dudy * scaling;
2364
   const float vy = dvdy * scaling;
2365

2366
   /* compute ellipse coefficients to bound the region: 
2367
    * A*x*x + B*x*y + C*y*y = F.
2368
    */
2369
   float A = vx*vx+vy*vy+1;
2370
   float B = -2*(ux*vx+uy*vy);
2371
   float C = ux*ux+uy*uy+1;
2372
   float F = A*C-B*B/4.0f;
2373

2374
   /* check if it is an ellipse */
2375
   /* assert(F > 0.0); */
2376

2377
   /* Compute the ellipse's (u,v) bounding box in texture space */
2378
   const float d = -B*B+4.0f*C*A;
2379
   const float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
2380
   const float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
2381

2382
   float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2383
   float s_buffer[TGSI_QUAD_SIZE];
2384
   float t_buffer[TGSI_QUAD_SIZE];
2385
   float weight_buffer[TGSI_QUAD_SIZE];
2386
   int j;
2387

2388
   /* Scale ellipse formula to directly index the Filter Lookup Table.
2389
    * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2390
    */
2391
   const double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
2392
   A *= formScale;
2393
   B *= formScale;
2394
   C *= formScale;
2395
   /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2396

2397
   /* For each quad, the du and dx values are the same and so the ellipse is
2398
    * also the same. Note that texel/image access can only be performed using
2399
    * a quad, i.e. it is not possible to get the pixel value for a single
2400
    * tex coord. In order to have a better performance, the access is buffered
2401
    * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
2402
    * full, then the pixel values are read from the image.
2403
    */
2404
   const float ddq = 2 * A;
2405

2406
   args.level = level;
2407
   args.offset = offset;
2408

2409
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2410
      /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2411
       * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2412
       * value, q, is less than F, we're inside the ellipse
2413
       */
2414
      const float tex_u = -0.5F + s[j] * texture->width0 * scaling;
2415
      const float tex_v = -0.5F + t[j] * texture->height0 * scaling;
2416

2417
      const int u0 = (int) floorf(tex_u - box_u);
2418
      const int u1 = (int) ceilf(tex_u + box_u);
2419
      const int v0 = (int) floorf(tex_v - box_v);
2420
      const int v1 = (int) ceilf(tex_v + box_v);
2421
      const float U = u0 - tex_u;
2422

2423
      float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
2424
      unsigned buffer_next = 0;
2425
      float den = 0;
2426
      int v;
2427
      args.face_id = faces[j];
2428

2429
      for (v = v0; v <= v1; ++v) {
2430
         const float V = v - tex_v;
2431
         float dq = A * (2 * U + 1) + B * V;
2432
         float q = (C * V + B * U) * V + A * U * U;
2433

2434
         int u;
2435
         for (u = u0; u <= u1; ++u) {
2436
            /* Note that the ellipse has been pre-scaled so F =
2437
             * WEIGHT_LUT_SIZE - 1
2438
             */
2439
            if (q < WEIGHT_LUT_SIZE) {
2440
               /* as a LUT is used, q must never be negative;
2441
                * should not happen, though
2442
                */
2443
               const int qClamped = q >= 0.0F ? q : 0;
2444
               const float weight = weightLut[qClamped];
2445

2446
               weight_buffer[buffer_next] = weight;
2447
               s_buffer[buffer_next] = u / ((float) width);
2448
               t_buffer[buffer_next] = v / ((float) height);
2449
            
2450
               buffer_next++;
2451
               if (buffer_next == TGSI_QUAD_SIZE) {
2452
                  /* 4 texel coords are in the buffer -> read it now */
2453
                  unsigned jj;
2454
                  /* it is assumed that samp->min_img_filter is set to
2455
                   * img_filter_2d_nearest or one of the
2456
                   * accelerated img_filter_2d_nearest_XXX functions.
2457
                   */
2458
                  for (jj = 0; jj < buffer_next; jj++) {
2459
                     args.s = s_buffer[jj];
2460
                     args.t = t_buffer[jj];
2461
                     args.p = p[jj];
2462
                     min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2463
                     num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2464
                     num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2465
                     num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2466
                     num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2467
                  }
2468

2469
                  buffer_next = 0;
2470
               }
2471

2472
               den += weight;
2473
            }
2474
            q += dq;
2475
            dq += ddq;
2476
         }
2477
      }
2478

2479
      /* if the tex coord buffer contains unread values, we will read
2480
       * them now.
2481
       */
2482
      if (buffer_next > 0) {
2483
         unsigned jj;
2484
         /* it is assumed that samp->min_img_filter is set to
2485
          * img_filter_2d_nearest or one of the
2486
          * accelerated img_filter_2d_nearest_XXX functions.
2487
          */
2488
         for (jj = 0; jj < buffer_next; jj++) {
2489
            args.s = s_buffer[jj];
2490
            args.t = t_buffer[jj];
2491
            args.p = p[jj];
2492
            min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2493
            num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2494
            num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2495
            num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2496
            num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2497
         }
2498
      }
2499

2500
      if (den <= 0.0F) {
2501
         /* Reaching this place would mean that no pixels intersected
2502
          * the ellipse.  This should never happen because the filter
2503
          * we use always intersects at least one pixel.
2504
          */
2505

2506
         /*rgba[0]=0;
2507
         rgba[1]=0;
2508
         rgba[2]=0;
2509
         rgba[3]=0;*/
2510
         /* not enough pixels in resampling, resort to direct interpolation */
2511
         args.s = s[j];
2512
         args.t = t[j];
2513
         args.p = p[j];
2514
         min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
2515
         den = 1;
2516
         num[0] = rgba_temp[0][j];
2517
         num[1] = rgba_temp[1][j];
2518
         num[2] = rgba_temp[2][j];
2519
         num[3] = rgba_temp[3][j];
2520
      }
2521

2522
      rgba[0][j] = num[0] / den;
2523
      rgba[1][j] = num[1] / den;
2524
      rgba[2][j] = num[2] / den;
2525
      rgba[3][j] = num[3] / den;
2526
   }
2527
}
2528

2529

2530
/**
2531
 * Get mip level relative to base level for linear mip filter
2532
 */
2533
static void
2534
mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
2535
                           const struct sp_sampler *sp_samp,
2536
                           const float lod[TGSI_QUAD_SIZE],
2537
                           float level[TGSI_QUAD_SIZE])
2538
{
2539
   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
2540
}
2541

2542
/**
2543
 * Sample 2D texture using an anisotropic filter.
2544
 */
2545
static void
2546
mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
2547
                        const struct sp_sampler *sp_samp,
2548
                        img_filter_func min_filter,
2549
                        img_filter_func mag_filter,
2550
                        const float s[TGSI_QUAD_SIZE],
2551
                        const float t[TGSI_QUAD_SIZE],
2552
                        const float p[TGSI_QUAD_SIZE],
2553
                        UNUSED int gather_comp,
2554
                        const float lod_in[TGSI_QUAD_SIZE],
2555
                        const struct filter_args *filt_args,
2556
                        float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2557
{
2558
   const struct pipe_resource *texture = sp_sview->base.texture;
2559
   const struct pipe_sampler_view *psview = &sp_sview->base;
2560
   int level0;
2561
   float lambda;
2562
   float lod[TGSI_QUAD_SIZE];
2563

2564
   const float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
2565
   const float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
2566
   const float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2567
   const float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2568
   const float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2569
   const float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2570
   struct img_filter_args args;
2571

2572
   args.offset = filt_args->offset;
2573

2574
   if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
2575
       filt_args->control == TGSI_SAMPLER_LOD_NONE ||
2576
       /* XXX FIXME */
2577
       filt_args->control == TGSI_SAMPLER_DERIVS_EXPLICIT) {
2578
      /* note: instead of working with Px and Py, we will use the 
2579
       * squared length instead, to avoid sqrt.
2580
       */
2581
      const float Px2 = dudx * dudx + dvdx * dvdx;
2582
      const float Py2 = dudy * dudy + dvdy * dvdy;
2583

2584
      float Pmax2;
2585
      float Pmin2;
2586
      float e;
2587
      const float maxEccentricity = sp_samp->base.max_anisotropy * sp_samp->base.max_anisotropy;
2588
      
2589
      if (Px2 < Py2) {
2590
         Pmax2 = Py2;
2591
         Pmin2 = Px2;
2592
      }
2593
      else {
2594
         Pmax2 = Px2;
2595
         Pmin2 = Py2;
2596
      }
2597
      
2598
      /* if the eccentricity of the ellipse is too big, scale up the shorter
2599
       * of the two vectors to limit the maximum amount of work per pixel
2600
       */
2601
      e = Pmax2 / Pmin2;
2602
      if (e > maxEccentricity) {
2603
         /* float s=e / maxEccentricity;
2604
            minor[0] *= s;
2605
            minor[1] *= s;
2606
            Pmin2 *= s; */
2607
         Pmin2 = Pmax2 / maxEccentricity;
2608
      }
2609
      
2610
      /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2611
       * this since 0.5*log(x) = log(sqrt(x))
2612
       */
2613
      lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
2614
      compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
2615
   }
2616
   else {
2617
      assert(filt_args->control == TGSI_SAMPLER_LOD_EXPLICIT ||
2618
             filt_args->control == TGSI_SAMPLER_LOD_ZERO);
2619
      compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
2620
   }
2621
   
2622
   /* XXX: Take into account all lod values.
2623
    */
2624
   lambda = lod[0];
2625
   level0 = psview->u.tex.first_level + (int)lambda;
2626

2627
   /* If the ellipse covers the whole image, we can
2628
    * simply return the average of the whole image.
2629
    */
2630
   if (level0 >= (int) psview->u.tex.last_level) {
2631
      int j;
2632
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2633
         args.s = s[j];
2634
         args.t = t[j];
2635
         args.p = p[j];
2636
         args.level = psview->u.tex.last_level;
2637
         args.face_id = filt_args->faces[j];
2638
         /*
2639
          * XXX: we overwrote any linear filter with nearest, so this
2640
          * isn't right (albeit if last level is 1x1 and no border it
2641
          * will work just the same).
2642
          */
2643
         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2644
      }
2645
   }
2646
   else {
2647
      /* don't bother interpolating between multiple LODs; it doesn't
2648
       * seem to be worth the extra running time.
2649
       */
2650
      img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
2651
                        s, t, p, filt_args->faces, filt_args->offset,
2652
                        level0, dudx, dvdx, dudy, dvdy, rgba);
2653
   }
2654

2655
   if (DEBUG_TEX) {
2656
      print_sample_4(__FUNCTION__, rgba);
2657
   }
2658
}
2659

2660
/**
2661
 * Get mip level relative to base level for linear mip filter
2662
 */
2663
static void
2664
mip_rel_level_linear_2d_linear_repeat_POT(
2665
   const struct sp_sampler_view *sp_sview,
2666
   const struct sp_sampler *sp_samp,
2667
   const float lod[TGSI_QUAD_SIZE],
2668
   float level[TGSI_QUAD_SIZE])
2669
{
2670
   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
2671
}
2672

2673
/**
2674
 * Specialized version of mip_filter_linear with hard-wired calls to
2675
 * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2676
 */
2677
static void
2678
mip_filter_linear_2d_linear_repeat_POT(
2679
   const struct sp_sampler_view *sp_sview,
2680
   const struct sp_sampler *sp_samp,
2681
   img_filter_func min_filter,
2682
   img_filter_func mag_filter,
2683
   const float s[TGSI_QUAD_SIZE],
2684
   const float t[TGSI_QUAD_SIZE],
2685
   const float p[TGSI_QUAD_SIZE],
2686
   int gather_comp,
2687
   const float lod[TGSI_QUAD_SIZE],
2688
   const struct filter_args *filt_args,
2689
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2690
{
2691
   const struct pipe_sampler_view *psview = &sp_sview->base;
2692
   int j;
2693

2694
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2695
      const int level0 = psview->u.tex.first_level + (int)lod[j];
2696
      struct img_filter_args args;
2697
      /* Catches both negative and large values of level0:
2698
       */
2699
      args.s = s[j];
2700
      args.t = t[j];
2701
      args.p = p[j];
2702
      args.face_id = filt_args->faces[j];
2703
      args.offset = filt_args->offset;
2704
      args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
2705
      args.gather_comp = gather_comp;
2706
      if ((unsigned)level0 >= psview->u.tex.last_level) {
2707
         if (level0 < 0)
2708
            args.level = psview->u.tex.first_level;
2709
         else
2710
            args.level = psview->u.tex.last_level;
2711
         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
2712
                                         &rgba[0][j]);
2713

2714
      }
2715
      else {
2716
         const float levelBlend = frac(lod[j]);
2717
         float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2718
         int c;
2719

2720
         args.level = level0;
2721
         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
2722
         args.level = level0+1;
2723
         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
2724

2725
         for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2726
            rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2727
      }
2728
   }
2729

2730
   if (DEBUG_TEX) {
2731
      print_sample_4(__FUNCTION__, rgba);
2732
   }
2733
}
2734

2735
static const struct sp_filter_funcs funcs_linear = {
2736
   mip_rel_level_linear,
2737
   mip_filter_linear
2738
};
2739

2740
static const struct sp_filter_funcs funcs_nearest = {
2741
   mip_rel_level_nearest,
2742
   mip_filter_nearest
2743
};
2744

2745
static const struct sp_filter_funcs funcs_none = {
2746
   mip_rel_level_none,
2747
   mip_filter_none
2748
};
2749

2750
static const struct sp_filter_funcs funcs_none_no_filter_select = {
2751
   mip_rel_level_none_no_filter_select,
2752
   mip_filter_none_no_filter_select
2753
};
2754

2755
static const struct sp_filter_funcs funcs_linear_aniso = {
2756
   mip_rel_level_linear_aniso,
2757
   mip_filter_linear_aniso
2758
};
2759

2760
static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
2761
   mip_rel_level_linear_2d_linear_repeat_POT,
2762
   mip_filter_linear_2d_linear_repeat_POT
2763
};
2764

2765
/**
2766
 * Do shadow/depth comparisons.
2767
 */
2768
static void
2769
sample_compare(const struct sp_sampler_view *sp_sview,
2770
               const struct sp_sampler *sp_samp,
2771
               const float c0[TGSI_QUAD_SIZE],
2772
               enum tgsi_sampler_control control,
2773
               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2774
{
2775
   const struct pipe_sampler_state *sampler = &sp_samp->base;
2776
   int j, v;
2777
   int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2778
   float pc[4];
2779
   const struct util_format_description *format_desc =
2780
      util_format_description(sp_sview->base.format);
2781
   /* not entirely sure we couldn't end up with non-valid swizzle here */
2782
   const unsigned chan_type =
2783
      format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
2784
      format_desc->channel[format_desc->swizzle[0]].type :
2785
      UTIL_FORMAT_TYPE_FLOAT;
2786
   const bool is_gather = (control == TGSI_SAMPLER_GATHER);
2787

2788
   /**
2789
    * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2790
    * for 2D Array texture we need to use the 'c0' (aka Q).
2791
    * When we sampled the depth texture, the depth value was put into all
2792
    * RGBA channels.  We look at the red channel here.
2793
    */
2794

2795

2796

2797
   if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2798
      /*
2799
       * clamping is a result of conversion to texture format, hence
2800
       * doesn't happen with floats. Technically also should do comparison
2801
       * in texture format (quantization!).
2802
       */
2803
      pc[0] = CLAMP(c0[0], 0.0F, 1.0F);
2804
      pc[1] = CLAMP(c0[1], 0.0F, 1.0F);
2805
      pc[2] = CLAMP(c0[2], 0.0F, 1.0F);
2806
      pc[3] = CLAMP(c0[3], 0.0F, 1.0F);
2807
   } else {
2808
      pc[0] = c0[0];
2809
      pc[1] = c0[1];
2810
      pc[2] = c0[2];
2811
      pc[3] = c0[3];
2812
   }
2813

2814
   for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
2815
      /* compare four texcoords vs. four texture samples */
2816
      switch (sampler->compare_func) {
2817
      case PIPE_FUNC_LESS:
2818
         k[v][0] = pc[0] < rgba[v][0];
2819
         k[v][1] = pc[1] < rgba[v][1];
2820
         k[v][2] = pc[2] < rgba[v][2];
2821
         k[v][3] = pc[3] < rgba[v][3];
2822
         break;
2823
      case PIPE_FUNC_LEQUAL:
2824
         k[v][0] = pc[0] <= rgba[v][0];
2825
         k[v][1] = pc[1] <= rgba[v][1];
2826
         k[v][2] = pc[2] <= rgba[v][2];
2827
         k[v][3] = pc[3] <= rgba[v][3];
2828
         break;
2829
      case PIPE_FUNC_GREATER:
2830
         k[v][0] = pc[0] > rgba[v][0];
2831
         k[v][1] = pc[1] > rgba[v][1];
2832
         k[v][2] = pc[2] > rgba[v][2];
2833
         k[v][3] = pc[3] > rgba[v][3];
2834
         break;
2835
      case PIPE_FUNC_GEQUAL:
2836
         k[v][0] = pc[0] >= rgba[v][0];
2837
         k[v][1] = pc[1] >= rgba[v][1];
2838
         k[v][2] = pc[2] >= rgba[v][2];
2839
         k[v][3] = pc[3] >= rgba[v][3];
2840
         break;
2841
      case PIPE_FUNC_EQUAL:
2842
         k[v][0] = pc[0] == rgba[v][0];
2843
         k[v][1] = pc[1] == rgba[v][1];
2844
         k[v][2] = pc[2] == rgba[v][2];
2845
         k[v][3] = pc[3] == rgba[v][3];
2846
         break;
2847
      case PIPE_FUNC_NOTEQUAL:
2848
         k[v][0] = pc[0] != rgba[v][0];
2849
         k[v][1] = pc[1] != rgba[v][1];
2850
         k[v][2] = pc[2] != rgba[v][2];
2851
         k[v][3] = pc[3] != rgba[v][3];
2852
         break;
2853
      case PIPE_FUNC_ALWAYS:
2854
         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
2855
         break;
2856
      case PIPE_FUNC_NEVER:
2857
         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2858
         break;
2859
      default:
2860
         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2861
         assert(0);
2862
         break;
2863
      }
2864
   }
2865

2866
   if (is_gather) {
2867
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2868
         for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
2869
            rgba[v][j] = k[v][j];
2870
         }
2871
      }
2872
   } else {
2873
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2874
         rgba[0][j] = k[0][j];
2875
         rgba[1][j] = k[0][j];
2876
         rgba[2][j] = k[0][j];
2877
         rgba[3][j] = 1.0F;
2878
      }
2879
   }
2880
}
2881

2882
static void
2883
do_swizzling(const struct pipe_sampler_view *sview,
2884
             float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2885
             float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2886
{
2887
   struct sp_sampler_view *sp_sview = (struct sp_sampler_view *)sview;
2888
   int j;
2889
   const unsigned swizzle_r = sview->swizzle_r;
2890
   const unsigned swizzle_g = sview->swizzle_g;
2891
   const unsigned swizzle_b = sview->swizzle_b;
2892
   const unsigned swizzle_a = sview->swizzle_a;
2893

2894
   switch (swizzle_r) {
2895
   case PIPE_SWIZZLE_0:
2896
      for (j = 0; j < 4; j++)
2897
         out[0][j] = 0.0f;
2898
      break;
2899
   case PIPE_SWIZZLE_1:
2900
      for (j = 0; j < 4; j++)
2901
         out[0][j] = sp_sview->oneval;
2902
      break;
2903
   default:
2904
      assert(swizzle_r < 4);
2905
      for (j = 0; j < 4; j++)
2906
         out[0][j] = in[swizzle_r][j];
2907
   }
2908

2909
   switch (swizzle_g) {
2910
   case PIPE_SWIZZLE_0:
2911
      for (j = 0; j < 4; j++)
2912
         out[1][j] = 0.0f;
2913
      break;
2914
   case PIPE_SWIZZLE_1:
2915
      for (j = 0; j < 4; j++)
2916
         out[1][j] = sp_sview->oneval;
2917
      break;
2918
   default:
2919
      assert(swizzle_g < 4);
2920
      for (j = 0; j < 4; j++)
2921
         out[1][j] = in[swizzle_g][j];
2922
   }
2923

2924
   switch (swizzle_b) {
2925
   case PIPE_SWIZZLE_0:
2926
      for (j = 0; j < 4; j++)
2927
         out[2][j] = 0.0f;
2928
      break;
2929
   case PIPE_SWIZZLE_1:
2930
      for (j = 0; j < 4; j++)
2931
         out[2][j] = sp_sview->oneval;
2932
      break;
2933
   default:
2934
      assert(swizzle_b < 4);
2935
      for (j = 0; j < 4; j++)
2936
         out[2][j] = in[swizzle_b][j];
2937
   }
2938

2939
   switch (swizzle_a) {
2940
   case PIPE_SWIZZLE_0:
2941
      for (j = 0; j < 4; j++)
2942
         out[3][j] = 0.0f;
2943
      break;
2944
   case PIPE_SWIZZLE_1:
2945
      for (j = 0; j < 4; j++)
2946
         out[3][j] = sp_sview->oneval;
2947
      break;
2948
   default:
2949
      assert(swizzle_a < 4);
2950
      for (j = 0; j < 4; j++)
2951
         out[3][j] = in[swizzle_a][j];
2952
   }
2953
}
2954

2955

2956
static wrap_nearest_func
2957
get_nearest_unorm_wrap(unsigned mode)
2958
{
2959
   switch (mode) {
2960
   case PIPE_TEX_WRAP_CLAMP:
2961
      return wrap_nearest_unorm_clamp;
2962
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2963
      return wrap_nearest_unorm_clamp_to_edge;
2964
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2965
      return wrap_nearest_unorm_clamp_to_border;
2966
   default:
2967
      debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2968
      return wrap_nearest_unorm_clamp;
2969
   }
2970
}
2971

2972

2973
static wrap_nearest_func
2974
get_nearest_wrap(unsigned mode)
2975
{
2976
   switch (mode) {
2977
   case PIPE_TEX_WRAP_REPEAT:
2978
      return wrap_nearest_repeat;
2979
   case PIPE_TEX_WRAP_CLAMP:
2980
      return wrap_nearest_clamp;
2981
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2982
      return wrap_nearest_clamp_to_edge;
2983
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2984
      return wrap_nearest_clamp_to_border;
2985
   case PIPE_TEX_WRAP_MIRROR_REPEAT:
2986
      return wrap_nearest_mirror_repeat;
2987
   case PIPE_TEX_WRAP_MIRROR_CLAMP:
2988
      return wrap_nearest_mirror_clamp;
2989
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2990
      return wrap_nearest_mirror_clamp_to_edge;
2991
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2992
      return wrap_nearest_mirror_clamp_to_border;
2993
   default:
2994
      assert(0);
2995
      return wrap_nearest_repeat;
2996
   }
2997
}
2998

2999

3000
static wrap_linear_func
3001
get_linear_unorm_wrap(unsigned mode)
3002
{
3003
   switch (mode) {
3004
   case PIPE_TEX_WRAP_CLAMP:
3005
      return wrap_linear_unorm_clamp;
3006
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
3007
      return wrap_linear_unorm_clamp_to_edge;
3008
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
3009
      return wrap_linear_unorm_clamp_to_border;
3010
   default:
3011
      debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
3012
      return wrap_linear_unorm_clamp;
3013
   }
3014
}
3015

3016

3017
static wrap_linear_func
3018
get_linear_wrap(unsigned mode)
3019
{
3020
   switch (mode) {
3021
   case PIPE_TEX_WRAP_REPEAT:
3022
      return wrap_linear_repeat;
3023
   case PIPE_TEX_WRAP_CLAMP:
3024
      return wrap_linear_clamp;
3025
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
3026
      return wrap_linear_clamp_to_edge;
3027
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
3028
      return wrap_linear_clamp_to_border;
3029
   case PIPE_TEX_WRAP_MIRROR_REPEAT:
3030
      return wrap_linear_mirror_repeat;
3031
   case PIPE_TEX_WRAP_MIRROR_CLAMP:
3032
      return wrap_linear_mirror_clamp;
3033
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
3034
      return wrap_linear_mirror_clamp_to_edge;
3035
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
3036
      return wrap_linear_mirror_clamp_to_border;
3037
   default:
3038
      assert(0);
3039
      return wrap_linear_repeat;
3040
   }
3041
}
3042

3043

3044
/**
3045
 * Is swizzling needed for the given state key?
3046
 */
3047
static inline bool
3048
any_swizzle(const struct pipe_sampler_view *view)
3049
{
3050
   return (view->swizzle_r != PIPE_SWIZZLE_X ||
3051
           view->swizzle_g != PIPE_SWIZZLE_Y ||
3052
           view->swizzle_b != PIPE_SWIZZLE_Z ||
3053
           view->swizzle_a != PIPE_SWIZZLE_W);
3054
}
3055

3056

3057
static img_filter_func
3058
get_img_filter(const struct sp_sampler_view *sp_sview,
3059
               const struct pipe_sampler_state *sampler,
3060
               unsigned filter, bool gather)
3061
{
3062
   switch (sp_sview->base.target) {
3063
   case PIPE_BUFFER:
3064
   case PIPE_TEXTURE_1D:
3065
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3066
         return img_filter_1d_nearest;
3067
      else
3068
         return img_filter_1d_linear;
3069
      break;
3070
   case PIPE_TEXTURE_1D_ARRAY:
3071
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3072
         return img_filter_1d_array_nearest;
3073
      else
3074
         return img_filter_1d_array_linear;
3075
      break;
3076
   case PIPE_TEXTURE_2D:
3077
   case PIPE_TEXTURE_RECT:
3078
      /* Try for fast path:
3079
       */
3080
      if (!gather && sp_sview->pot2d &&
3081
          sampler->wrap_s == sampler->wrap_t &&
3082
          sampler->normalized_coords) 
3083
      {
3084
         switch (sampler->wrap_s) {
3085
         case PIPE_TEX_WRAP_REPEAT:
3086
            switch (filter) {
3087
            case PIPE_TEX_FILTER_NEAREST:
3088
               return img_filter_2d_nearest_repeat_POT;
3089
            case PIPE_TEX_FILTER_LINEAR:
3090
               return img_filter_2d_linear_repeat_POT;
3091
            default:
3092
               break;
3093
            }
3094
            break;
3095
         case PIPE_TEX_WRAP_CLAMP:
3096
            switch (filter) {
3097
            case PIPE_TEX_FILTER_NEAREST:
3098
               return img_filter_2d_nearest_clamp_POT;
3099
            default:
3100
               break;
3101
            }
3102
         }
3103
      }
3104
      /* Otherwise use default versions:
3105
       */
3106
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3107
         return img_filter_2d_nearest;
3108
      else
3109
         return img_filter_2d_linear;
3110
      break;
3111
   case PIPE_TEXTURE_2D_ARRAY:
3112
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3113
         return img_filter_2d_array_nearest;
3114
      else
3115
         return img_filter_2d_array_linear;
3116
      break;
3117
   case PIPE_TEXTURE_CUBE:
3118
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3119
         return img_filter_cube_nearest;
3120
      else
3121
         return img_filter_cube_linear;
3122
      break;
3123
   case PIPE_TEXTURE_CUBE_ARRAY:
3124
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3125
         return img_filter_cube_array_nearest;
3126
      else
3127
         return img_filter_cube_array_linear;
3128
      break;
3129
   case PIPE_TEXTURE_3D:
3130
      if (filter == PIPE_TEX_FILTER_NEAREST) 
3131
         return img_filter_3d_nearest;
3132
      else
3133
         return img_filter_3d_linear;
3134
      break;
3135
   default:
3136
      assert(0);
3137
      return img_filter_1d_nearest;
3138
   }
3139
}
3140

3141
/**
3142
 * Get mip filter funcs, and optionally both img min filter and img mag
3143
 * filter. Note that both img filter function pointers must be either non-NULL
3144
 * or NULL.
3145
 */
3146
static void
3147
get_filters(const struct sp_sampler_view *sp_sview,
3148
            const struct sp_sampler *sp_samp,
3149
            const enum tgsi_sampler_control control,
3150
            const struct sp_filter_funcs **funcs,
3151
            img_filter_func *min,
3152
            img_filter_func *mag)
3153
{
3154
   assert(funcs);
3155
   if (control == TGSI_SAMPLER_GATHER) {
3156
      *funcs = &funcs_nearest;
3157
      if (min) {
3158
         *min = get_img_filter(sp_sview, &sp_samp->base,
3159
                               PIPE_TEX_FILTER_LINEAR, true);
3160
      }
3161
   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
3162
      *funcs = &funcs_linear_2d_linear_repeat_POT;
3163
   } else {
3164
      *funcs = sp_samp->filter_funcs;
3165
      if (min) {
3166
         assert(mag);
3167
         *min = get_img_filter(sp_sview, &sp_samp->base,
3168
                               sp_samp->min_img_filter, false);
3169
         if (sp_samp->min_mag_equal) {
3170
            *mag = *min;
3171
         } else {
3172
            *mag = get_img_filter(sp_sview, &sp_samp->base,
3173
                                  sp_samp->base.mag_img_filter, false);
3174
         }
3175
      }
3176
   }
3177
}
3178

3179
static void
3180
sample_mip(const struct sp_sampler_view *sp_sview,
3181
           const struct sp_sampler *sp_samp,
3182
           const float s[TGSI_QUAD_SIZE],
3183
           const float t[TGSI_QUAD_SIZE],
3184
           const float p[TGSI_QUAD_SIZE],
3185
           const float c0[TGSI_QUAD_SIZE],
3186
           int gather_comp,
3187
           const float lod[TGSI_QUAD_SIZE],
3188
           const struct filter_args *filt_args,
3189
           float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3190
{
3191
   const struct sp_filter_funcs *funcs = NULL;
3192
   img_filter_func min_img_filter = NULL;
3193
   img_filter_func mag_img_filter = NULL;
3194

3195
   get_filters(sp_sview, sp_samp, filt_args->control,
3196
               &funcs, &min_img_filter, &mag_img_filter);
3197

3198
   funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
3199
                 s, t, p, gather_comp, lod, filt_args, rgba);
3200

3201
   if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
3202
      sample_compare(sp_sview, sp_samp, c0, filt_args->control, rgba);
3203
   }
3204

3205
   if (sp_sview->need_swizzle && filt_args->control != TGSI_SAMPLER_GATHER) {
3206
      float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3207
      memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3208
      do_swizzling(&sp_sview->base, rgba_temp, rgba);
3209
   }
3210

3211
}
3212

3213

3214
/**
3215
 * This function uses cube texture coordinates to choose a face of a cube and
3216
 * computes the 2D cube face coordinates. Puts face info into the sampler
3217
 * faces[] array.
3218
 */
3219
static void
3220
convert_cube(const struct sp_sampler_view *sp_sview,
3221
             const struct sp_sampler *sp_samp,
3222
             const float s[TGSI_QUAD_SIZE],
3223
             const float t[TGSI_QUAD_SIZE],
3224
             const float p[TGSI_QUAD_SIZE],
3225
             const float c0[TGSI_QUAD_SIZE],
3226
             float ssss[TGSI_QUAD_SIZE],
3227
             float tttt[TGSI_QUAD_SIZE],
3228
             float pppp[TGSI_QUAD_SIZE],
3229
             uint faces[TGSI_QUAD_SIZE])
3230
{
3231
   unsigned j;
3232

3233
   pppp[0] = c0[0];
3234
   pppp[1] = c0[1];
3235
   pppp[2] = c0[2];
3236
   pppp[3] = c0[3];
3237
   /*
3238
     major axis
3239
     direction    target                             sc     tc    ma
3240
     ----------   -------------------------------    ---    ---   ---
3241
     +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
3242
     -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
3243
     +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
3244
     -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
3245
     +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
3246
     -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
3247
   */
3248

3249
   /* Choose the cube face and compute new s/t coords for the 2D face.
3250
    *
3251
    * Use the same cube face for all four pixels in the quad.
3252
    *
3253
    * This isn't ideal, but if we want to use a different cube face
3254
    * per pixel in the quad, we'd have to also compute the per-face
3255
    * LOD here too.  That's because the four post-face-selection
3256
    * texcoords are no longer related to each other (they're
3257
    * per-face!)  so we can't use subtraction to compute the partial
3258
    * deriviates to compute the LOD.  Doing so (near cube edges
3259
    * anyway) gives us pretty much random values.
3260
    */
3261
   for (j = 0; j < TGSI_QUAD_SIZE; j++)  {
3262
      const float rx = s[j], ry = t[j], rz = p[j];
3263
      const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
3264

3265
      if (arx >= ary && arx >= arz) {
3266
         const float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
3267
         const uint face = (rx >= 0.0F) ?
3268
            PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
3269
         const float ima = -0.5F / fabsf(s[j]);
3270
         ssss[j] = sign *  p[j] * ima + 0.5F;
3271
         tttt[j] =         t[j] * ima + 0.5F;
3272
         faces[j] = face;
3273
      }
3274
      else if (ary >= arx && ary >= arz) {
3275
         const float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
3276
         const uint face = (ry >= 0.0F) ?
3277
            PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
3278
         const float ima = -0.5F / fabsf(t[j]);
3279
         ssss[j] =        -s[j] * ima + 0.5F;
3280
         tttt[j] = sign * -p[j] * ima + 0.5F;
3281
         faces[j] = face;
3282
      }
3283
      else {
3284
         const float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
3285
         const uint face = (rz >= 0.0F) ?
3286
            PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
3287
         const float ima = -0.5F / fabsf(p[j]);
3288
         ssss[j] = sign * -s[j] * ima + 0.5F;
3289
         tttt[j] =         t[j] * ima + 0.5F;
3290
         faces[j] = face;
3291
      }
3292
   }
3293
}
3294

3295

3296
static void
3297
sp_get_dims(const struct sp_sampler_view *sp_sview,
3298
            int level,
3299
            int dims[4])
3300
{
3301
   const struct pipe_sampler_view *view = &sp_sview->base;
3302
   const struct pipe_resource *texture = view->texture;
3303

3304
   if (view->target == PIPE_BUFFER) {
3305
      dims[0] = view->u.buf.size / util_format_get_blocksize(view->format);
3306
      /* the other values are undefined, but let's avoid potential valgrind
3307
       * warnings.
3308
       */
3309
      dims[1] = dims[2] = dims[3] = 0;
3310
      return;
3311
   }
3312

3313
   /* undefined according to EXT_gpu_program */
3314
   level += view->u.tex.first_level;
3315
   if (level > view->u.tex.last_level)
3316
      return;
3317

3318
   dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
3319
   dims[0] = u_minify(texture->width0, level);
3320

3321
   switch (view->target) {
3322
   case PIPE_TEXTURE_1D_ARRAY:
3323
      dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3324
      FALLTHROUGH;
3325
   case PIPE_TEXTURE_1D:
3326
      return;
3327
   case PIPE_TEXTURE_2D_ARRAY:
3328
      dims[2] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3329
      FALLTHROUGH;
3330
   case PIPE_TEXTURE_2D:
3331
   case PIPE_TEXTURE_CUBE:
3332
   case PIPE_TEXTURE_RECT:
3333
      dims[1] = u_minify(texture->height0, level);
3334
      return;
3335
   case PIPE_TEXTURE_3D:
3336
      dims[1] = u_minify(texture->height0, level);
3337
      dims[2] = u_minify(texture->depth0, level);
3338
      return;
3339
   case PIPE_TEXTURE_CUBE_ARRAY:
3340
      dims[1] = u_minify(texture->height0, level);
3341
      dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
3342
      break;
3343
   default:
3344
      assert(!"unexpected texture target in sp_get_dims()");
3345
      return;
3346
   }
3347
}
3348

3349
/**
3350
 * This function is only used for getting unfiltered texels via the
3351
 * TXF opcode.  The GL spec says that out-of-bounds texel fetches
3352
 * produce undefined results.  Instead of crashing, lets just clamp
3353
 * coords to the texture image size.
3354
 */
3355
static void
3356
sp_get_texels(const struct sp_sampler_view *sp_sview,
3357
              const int v_i[TGSI_QUAD_SIZE],
3358
              const int v_j[TGSI_QUAD_SIZE],
3359
              const int v_k[TGSI_QUAD_SIZE],
3360
              const int lod[TGSI_QUAD_SIZE],
3361
              const int8_t offset[3],
3362
              float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3363
{
3364
   union tex_tile_address addr;
3365
   const struct pipe_resource *texture = sp_sview->base.texture;
3366
   int j, c;
3367
   const float *tx;
3368
   /* TODO write a better test for LOD */
3369
   const unsigned level =
3370
      sp_sview->base.target == PIPE_BUFFER ? 0 :
3371
      CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
3372
            sp_sview->base.u.tex.first_level,
3373
            sp_sview->base.u.tex.last_level);
3374
   const int width = u_minify(texture->width0, level);
3375
   const int height = u_minify(texture->height0, level);
3376
   const int depth = u_minify(texture->depth0, level);
3377
   unsigned elem_size, first_element, last_element;
3378

3379
   addr.value = 0;
3380
   addr.bits.level = level;
3381

3382
   switch (sp_sview->base.target) {
3383
   case PIPE_BUFFER:
3384
      elem_size = util_format_get_blocksize(sp_sview->base.format);
3385
      first_element = sp_sview->base.u.buf.offset / elem_size;
3386
      last_element = (sp_sview->base.u.buf.offset +
3387
                      sp_sview->base.u.buf.size) / elem_size - 1;
3388
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3389
         const int x = CLAMP(v_i[j] + offset[0] +
3390
                             first_element,
3391
                             first_element,
3392
                             last_element);
3393
         tx = get_texel_buffer_no_border(sp_sview, addr, x, elem_size);
3394
         for (c = 0; c < 4; c++) {
3395
            rgba[c][j] = tx[c];
3396
         }
3397
      }
3398
      break;
3399
   case PIPE_TEXTURE_1D:
3400
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3401
         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3402
         tx = get_texel_2d_no_border(sp_sview, addr, x,
3403
                                     sp_sview->base.u.tex.first_layer);
3404
         for (c = 0; c < 4; c++) {
3405
            rgba[c][j] = tx[c];
3406
         }
3407
      }
3408
      break;
3409
   case PIPE_TEXTURE_1D_ARRAY:
3410
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3411
         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3412
         const int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
3413
                             sp_sview->base.u.tex.last_layer);
3414
         tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3415
         for (c = 0; c < 4; c++) {
3416
            rgba[c][j] = tx[c];
3417
         }
3418
      }
3419
      break;
3420
   case PIPE_TEXTURE_2D:
3421
   case PIPE_TEXTURE_RECT:
3422
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3423
         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3424
         const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3425
         tx = get_texel_3d_no_border(sp_sview, addr, x, y,
3426
                                     sp_sview->base.u.tex.first_layer);
3427
         for (c = 0; c < 4; c++) {
3428
            rgba[c][j] = tx[c];
3429
         }
3430
      }
3431
      break;
3432
   case PIPE_TEXTURE_2D_ARRAY:
3433
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3434
         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3435
         const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3436
         const int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
3437
                                 sp_sview->base.u.tex.last_layer);
3438
         tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
3439
         for (c = 0; c < 4; c++) {
3440
            rgba[c][j] = tx[c];
3441
         }
3442
      }
3443
      break;
3444
   case PIPE_TEXTURE_3D:
3445
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3446
         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3447
         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3448
         int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
3449
         tx = get_texel_3d_no_border(sp_sview, addr, x, y, z);
3450
         for (c = 0; c < 4; c++) {
3451
            rgba[c][j] = tx[c];
3452
         }
3453
      }
3454
      break;
3455
   case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
3456
   case PIPE_TEXTURE_CUBE_ARRAY:
3457
   default:
3458
      assert(!"Unknown or CUBE texture type in TXF processing\n");
3459
      break;
3460
   }
3461

3462
   if (sp_sview->need_swizzle) {
3463
      float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3464
      memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3465
      do_swizzling(&sp_sview->base, rgba_temp, rgba);
3466
   }
3467
}
3468

3469

3470
void *
3471
softpipe_create_sampler_state(struct pipe_context *pipe,
3472
                              const struct pipe_sampler_state *sampler)
3473
{
3474
   struct sp_sampler *samp = CALLOC_STRUCT(sp_sampler);
3475

3476
   samp->base = *sampler;
3477

3478
   /* Note that (for instance) linear_texcoord_s and
3479
    * nearest_texcoord_s may be active at the same time, if the
3480
    * sampler min_img_filter differs from its mag_img_filter.
3481
    */
3482
   if (sampler->normalized_coords) {
3483
      samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
3484
      samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
3485
      samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
3486

3487
      samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
3488
      samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
3489
      samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
3490
   }
3491
   else {
3492
      samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
3493
      samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
3494
      samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
3495

3496
      samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
3497
      samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
3498
      samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
3499
   }
3500

3501
   samp->min_img_filter = sampler->min_img_filter;
3502

3503
   switch (sampler->min_mip_filter) {
3504
   case PIPE_TEX_MIPFILTER_NONE:
3505
      if (sampler->min_img_filter == sampler->mag_img_filter)
3506
         samp->filter_funcs = &funcs_none_no_filter_select;
3507
      else
3508
         samp->filter_funcs = &funcs_none;
3509
      break;
3510

3511
   case PIPE_TEX_MIPFILTER_NEAREST:
3512
      samp->filter_funcs = &funcs_nearest;
3513
      break;
3514

3515
   case PIPE_TEX_MIPFILTER_LINEAR:
3516
      if (sampler->min_img_filter == sampler->mag_img_filter &&
3517
          sampler->normalized_coords &&
3518
          sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
3519
          sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
3520
          sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
3521
          sampler->max_anisotropy <= 1) {
3522
         samp->min_mag_equal_repeat_linear = TRUE;
3523
      }
3524
      samp->filter_funcs = &funcs_linear;
3525

3526
      /* Anisotropic filtering extension. */
3527
      if (sampler->max_anisotropy > 1) {
3528
         samp->filter_funcs = &funcs_linear_aniso;
3529

3530
         /* Override min_img_filter:
3531
          * min_img_filter needs to be set to NEAREST since we need to access
3532
          * each texture pixel as it is and weight it later; using linear
3533
          * filters will have incorrect results.
3534
          * By setting the filter to NEAREST here, we can avoid calling the
3535
          * generic img_filter_2d_nearest in the anisotropic filter function,
3536
          * making it possible to use one of the accelerated implementations
3537
          */
3538
         samp->min_img_filter = PIPE_TEX_FILTER_NEAREST;
3539

3540
         /* on first access create the lookup table containing the filter weights. */
3541
        if (!weightLut) {
3542
           create_filter_table();
3543
        }
3544
      }
3545
      break;
3546
   }
3547
   if (samp->min_img_filter == sampler->mag_img_filter) {
3548
      samp->min_mag_equal = TRUE;
3549
   }
3550

3551
   return (void *)samp;
3552
}
3553

3554

3555
compute_lambda_func
3556
softpipe_get_lambda_func(const struct pipe_sampler_view *view,
3557
                         enum pipe_shader_type shader)
3558
{
3559
   if (shader != PIPE_SHADER_FRAGMENT)
3560
      return compute_lambda_vert;
3561

3562
   switch (view->target) {
3563
   case PIPE_BUFFER:
3564
   case PIPE_TEXTURE_1D:
3565
   case PIPE_TEXTURE_1D_ARRAY:
3566
      return compute_lambda_1d;
3567
   case PIPE_TEXTURE_2D:
3568
   case PIPE_TEXTURE_2D_ARRAY:
3569
   case PIPE_TEXTURE_RECT:
3570
      return compute_lambda_2d;
3571
   case PIPE_TEXTURE_CUBE:
3572
   case PIPE_TEXTURE_CUBE_ARRAY:
3573
      return compute_lambda_cube;
3574
   case PIPE_TEXTURE_3D:
3575
      return compute_lambda_3d;
3576
   default:
3577
      assert(0);
3578
      return compute_lambda_1d;
3579
   }
3580
}
3581

3582

3583
struct pipe_sampler_view *
3584
softpipe_create_sampler_view(struct pipe_context *pipe,
3585
                             struct pipe_resource *resource,
3586
                             const struct pipe_sampler_view *templ)
3587
{
3588
   struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
3589
   const struct softpipe_resource *spr = (struct softpipe_resource *)resource;
3590

3591
   if (sview) {
3592
      struct pipe_sampler_view *view = &sview->base;
3593
      *view = *templ;
3594
      view->reference.count = 1;
3595
      view->texture = NULL;
3596
      pipe_resource_reference(&view->texture, resource);
3597
      view->context = pipe;
3598

3599
#ifdef DEBUG
3600
     /*
3601
      * This is possibly too lenient, but the primary reason is just
3602
      * to catch gallium frontends which forget to initialize this, so
3603
      * it only catches clearly impossible view targets.
3604
      */
3605
      if (view->target != resource->target) {
3606
         if (view->target == PIPE_TEXTURE_1D)
3607
            assert(resource->target == PIPE_TEXTURE_1D_ARRAY);
3608
         else if (view->target == PIPE_TEXTURE_1D_ARRAY)
3609
            assert(resource->target == PIPE_TEXTURE_1D);
3610
         else if (view->target == PIPE_TEXTURE_2D)
3611
            assert(resource->target == PIPE_TEXTURE_2D_ARRAY ||
3612
                   resource->target == PIPE_TEXTURE_CUBE ||
3613
                   resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3614
         else if (view->target == PIPE_TEXTURE_2D_ARRAY)
3615
            assert(resource->target == PIPE_TEXTURE_2D ||
3616
                   resource->target == PIPE_TEXTURE_CUBE ||
3617
                   resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3618
         else if (view->target == PIPE_TEXTURE_CUBE)
3619
            assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY ||
3620
                   resource->target == PIPE_TEXTURE_2D_ARRAY);
3621
         else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
3622
            assert(resource->target == PIPE_TEXTURE_CUBE ||
3623
                   resource->target == PIPE_TEXTURE_2D_ARRAY);
3624
         else
3625
            assert(0);
3626
      }
3627
#endif
3628

3629
      if (any_swizzle(view)) {
3630
         sview->need_swizzle = TRUE;
3631
      }
3632

3633
      sview->need_cube_convert = (view->target == PIPE_TEXTURE_CUBE ||
3634
                                  view->target == PIPE_TEXTURE_CUBE_ARRAY);
3635
      sview->pot2d = spr->pot &&
3636
                     (view->target == PIPE_TEXTURE_2D ||
3637
                      view->target == PIPE_TEXTURE_RECT);
3638

3639
      sview->xpot = util_logbase2( resource->width0 );
3640
      sview->ypot = util_logbase2( resource->height0 );
3641

3642
      sview->oneval = util_format_is_pure_integer(view->format) ? uif(1) : 1.0f;
3643
   }
3644

3645
   return (struct pipe_sampler_view *) sview;
3646
}
3647

3648

3649
static inline const struct sp_tgsi_sampler *
3650
sp_tgsi_sampler_cast_c(const struct tgsi_sampler *sampler)
3651
{
3652
   return (const struct sp_tgsi_sampler *)sampler;
3653
}
3654

3655

3656
static void
3657
sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3658
                 const unsigned sview_index,
3659
                 int level, int dims[4])
3660
{
3661
   const struct sp_tgsi_sampler *sp_samp =
3662
      sp_tgsi_sampler_cast_c(tgsi_sampler);
3663

3664
   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3665
   /* always have a view here but texture is NULL if no sampler view was set. */
3666
   if (!sp_samp->sp_sview[sview_index].base.texture) {
3667
      dims[0] = dims[1] = dims[2] = dims[3] = 0;
3668
      return;
3669
   }
3670
   sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
3671
}
3672

3673

3674
static void prepare_compare_values(enum pipe_texture_target target,
3675
                                   const float p[TGSI_QUAD_SIZE],
3676
                                   const float c0[TGSI_QUAD_SIZE],
3677
                                   const float c1[TGSI_QUAD_SIZE],
3678
                                   float pc[TGSI_QUAD_SIZE])
3679
{
3680
   if (target == PIPE_TEXTURE_2D_ARRAY ||
3681
       target == PIPE_TEXTURE_CUBE) {
3682
      pc[0] = c0[0];
3683
      pc[1] = c0[1];
3684
      pc[2] = c0[2];
3685
      pc[3] = c0[3];
3686
   } else if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3687
      pc[0] = c1[0];
3688
      pc[1] = c1[1];
3689
      pc[2] = c1[2];
3690
      pc[3] = c1[3];
3691
   } else {
3692
      pc[0] = p[0];
3693
      pc[1] = p[1];
3694
      pc[2] = p[2];
3695
      pc[3] = p[3];
3696
   }
3697
}
3698

3699
static void
3700
sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3701
                    const unsigned sview_index,
3702
                    const unsigned sampler_index,
3703
                    const float s[TGSI_QUAD_SIZE],
3704
                    const float t[TGSI_QUAD_SIZE],
3705
                    const float p[TGSI_QUAD_SIZE],
3706
                    const float c0[TGSI_QUAD_SIZE],
3707
                    const float lod_in[TGSI_QUAD_SIZE],
3708
                    float derivs[3][2][TGSI_QUAD_SIZE],
3709
                    const int8_t offset[3],
3710
                    enum tgsi_sampler_control control,
3711
                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3712
{
3713
   const struct sp_tgsi_sampler *sp_tgsi_samp =
3714
      sp_tgsi_sampler_cast_c(tgsi_sampler);
3715
   struct sp_sampler_view sp_sview;
3716
   const struct sp_sampler *sp_samp;
3717
   struct filter_args filt_args;
3718
   float compare_values[TGSI_QUAD_SIZE];
3719
   float lod[TGSI_QUAD_SIZE];
3720
   int c;
3721

3722
   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3723
   assert(sampler_index < PIPE_MAX_SAMPLERS);
3724
   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
3725

3726
   memcpy(&sp_sview, &sp_tgsi_samp->sp_sview[sview_index],
3727
          sizeof(struct sp_sampler_view));
3728
   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
3729

3730
   if (util_format_is_unorm(sp_sview.base.format)) {
3731
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
3732
          sp_sview.border_color.f[c] = CLAMP(sp_samp->base.border_color.f[c],
3733
                                              0.0f, 1.0f);
3734
   } else if (util_format_is_snorm(sp_sview.base.format)) {
3735
      for (c = 0; c < TGSI_NUM_CHANNELS; c++)
3736
          sp_sview.border_color.f[c] = CLAMP(sp_samp->base.border_color.f[c],
3737
                                              -1.0f, 1.0f);
3738
   } else {
3739
      memcpy(sp_sview.border_color.f, sp_samp->base.border_color.f,
3740
             TGSI_NUM_CHANNELS * sizeof(float));
3741
   }
3742

3743
   /* always have a view here but texture is NULL if no sampler view was set. */
3744
   if (!sp_sview.base.texture) {
3745
      int i, j;
3746
      for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3747
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3748
            rgba[j][i] = 0.0f;
3749
         }
3750
      }
3751
      return;
3752
   }
3753

3754
   if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE)
3755
      prepare_compare_values(sp_sview.base.target, p, c0, lod_in, compare_values);
3756

3757
   filt_args.control = control;
3758
   filt_args.offset = offset;
3759
   int gather_comp = get_gather_component(lod_in);
3760

3761
   compute_lambda_lod(&sp_sview, sp_samp, s, t, p, derivs, lod_in, control, lod);
3762

3763
   if (sp_sview.need_cube_convert) {
3764
      float cs[TGSI_QUAD_SIZE];
3765
      float ct[TGSI_QUAD_SIZE];
3766
      float cp[TGSI_QUAD_SIZE];
3767
      uint faces[TGSI_QUAD_SIZE];
3768

3769
      convert_cube(&sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
3770

3771
      filt_args.faces = faces;
3772
      sample_mip(&sp_sview, sp_samp, cs, ct, cp, compare_values, gather_comp, lod, &filt_args, rgba);
3773
   } else {
3774
      static const uint zero_faces[TGSI_QUAD_SIZE] = {0, 0, 0, 0};
3775

3776
      filt_args.faces = zero_faces;
3777
      sample_mip(&sp_sview, sp_samp, s, t, p, compare_values, gather_comp, lod, &filt_args, rgba);
3778
   }
3779
}
3780

3781
static void
3782
sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
3783
                  const unsigned sview_index,
3784
                  const unsigned sampler_index,
3785
                  const float s[TGSI_QUAD_SIZE],
3786
                  const float t[TGSI_QUAD_SIZE],
3787
                  const float p[TGSI_QUAD_SIZE],
3788
                  const float c0[TGSI_QUAD_SIZE],
3789
                  const enum tgsi_sampler_control control,
3790
                  float mipmap[TGSI_QUAD_SIZE],
3791
                  float lod[TGSI_QUAD_SIZE])
3792
{
3793
   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
3794
   static const float dummy_grad[3][2][TGSI_QUAD_SIZE];
3795

3796
   const struct sp_tgsi_sampler *sp_tgsi_samp =
3797
      sp_tgsi_sampler_cast_c(tgsi_sampler);
3798
   const struct sp_sampler_view *sp_sview;
3799
   const struct sp_sampler *sp_samp;
3800
   const struct sp_filter_funcs *funcs;
3801
   int i;
3802

3803
   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3804
   assert(sampler_index < PIPE_MAX_SAMPLERS);
3805
   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
3806

3807
   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
3808
   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
3809
   /* always have a view here but texture is NULL if no sampler view was
3810
    * set. */
3811
   if (!sp_sview->base.texture) {
3812
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3813
         mipmap[i] = 0.0f;
3814
         lod[i] = 0.0f;
3815
      }
3816
      return;
3817
   }
3818
   compute_lambda_lod_unclamped(sp_sview, sp_samp,
3819
                                s, t, p, dummy_grad, lod_in, control, lod);
3820

3821
   get_filters(sp_sview, sp_samp, control, &funcs, NULL, NULL);
3822
   funcs->relative_level(sp_sview, sp_samp, lod, mipmap);
3823
}
3824

3825
static void
3826
sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3827
                  const unsigned sview_index,
3828
                  const int i[TGSI_QUAD_SIZE],
3829
                  const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3830
                  const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3831
                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3832
{
3833
   const struct sp_tgsi_sampler *sp_samp =
3834
      sp_tgsi_sampler_cast_c(tgsi_sampler);
3835

3836
   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3837
   /* always have a view here but texture is NULL if no sampler view was set. */
3838
   if (!sp_samp->sp_sview[sview_index].base.texture) {
3839
      int i, j;
3840
      for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3841
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3842
            rgba[j][i] = 0.0f;
3843
         }
3844
      }
3845
      return;
3846
   }
3847
   sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
3848
}
3849

3850

3851
struct sp_tgsi_sampler *
3852
sp_create_tgsi_sampler(void)
3853
{
3854
   struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3855
   if (!samp)
3856
      return NULL;
3857

3858
   samp->base.get_dims = sp_tgsi_get_dims;
3859
   samp->base.get_samples = sp_tgsi_get_samples;
3860
   samp->base.get_texel = sp_tgsi_get_texel;
3861
   samp->base.query_lod = sp_tgsi_query_lod;
3862

3863
   return samp;
3864
}
3865

3866
Product

Resources

Company