CoCalc -- gregory

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/subdiv/gregory_patch.h
²¹³¹⁸ views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3

4
#pragma once
5

6
#include "catmullclark_patch.h"
7
#include "bezier_patch.h"
8
#include "bezier_curve.h"
9
#include "catmullclark_coefficients.h"
10

11
namespace embree
12
{  
13
  template<typename Vertex, typename Vertex_t = Vertex>
14
  class __aligned(64) GregoryPatchT
15
  {
16
    typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
17
    typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
18
    typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
19
    typedef BezierCurveT<Vertex> BezierCurve;
20

21
  public:
22
    Vertex v[4][4];
23
    Vertex f[2][2];
24

25
    __forceinline GregoryPatchT() {}
26

27
    __forceinline GregoryPatchT(const CatmullClarkPatch& patch) {
28
      init(patch);
29
    }
30

31
    __forceinline GregoryPatchT(const CatmullClarkPatch& patch, 
32
                                const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) 
33
    {
34
      init_crackfix(patch,border0,border1,border2,border3);
35
    }
36

37
    __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) { 
38
      init(CatmullClarkPatch(edge,vertices,stride));
39
    }
40
      
41
    __forceinline Vertex& p0() { return v[0][0]; }
42
    __forceinline Vertex& p1() { return v[0][3]; }
43
    __forceinline Vertex& p2() { return v[3][3]; }
44
    __forceinline Vertex& p3() { return v[3][0]; }
45
    
46
    __forceinline Vertex& e0_p() { return v[0][1]; }
47
    __forceinline Vertex& e0_m() { return v[1][0]; }
48
    __forceinline Vertex& e1_p() { return v[1][3]; }
49
    __forceinline Vertex& e1_m() { return v[0][2]; }
50
    __forceinline Vertex& e2_p() { return v[3][2]; }
51
    __forceinline Vertex& e2_m() { return v[2][3]; }
52
    __forceinline Vertex& e3_p() { return v[2][0]; }
53
    __forceinline Vertex& e3_m() { return v[3][1]; }
54
    
55
    __forceinline Vertex& f0_p() { return v[1][1]; }
56
    __forceinline Vertex& f1_p() { return v[1][2]; }
57
    __forceinline Vertex& f2_p() { return v[2][2]; }
58
    __forceinline Vertex& f3_p() { return v[2][1]; }
59
    __forceinline Vertex& f0_m() { return f[0][0]; }
60
    __forceinline Vertex& f1_m() { return f[0][1]; }
61
    __forceinline Vertex& f2_m() { return f[1][1]; }
62
    __forceinline Vertex& f3_m() { return f[1][0]; }
63
    
64
    __forceinline const Vertex& p0() const { return v[0][0]; }
65
    __forceinline const Vertex& p1() const { return v[0][3]; }
66
    __forceinline const Vertex& p2() const { return v[3][3]; }
67
    __forceinline const Vertex& p3() const { return v[3][0]; }
68
    
69
    __forceinline const Vertex& e0_p() const { return v[0][1]; }
70
    __forceinline const Vertex& e0_m() const { return v[1][0]; }
71
    __forceinline const Vertex& e1_p() const { return v[1][3]; }
72
    __forceinline const Vertex& e1_m() const { return v[0][2]; }
73
    __forceinline const Vertex& e2_p() const { return v[3][2]; }
74
    __forceinline const Vertex& e2_m() const { return v[2][3]; }
75
    __forceinline const Vertex& e3_p() const { return v[2][0]; }
76
    __forceinline const Vertex& e3_m() const { return v[3][1]; }
77
    
78
    __forceinline const Vertex& f0_p() const { return v[1][1]; }
79
    __forceinline const Vertex& f1_p() const { return v[1][2]; }
80
    __forceinline const Vertex& f2_p() const { return v[2][2]; }
81
    __forceinline const Vertex& f3_p() const { return v[2][1]; }
82
    __forceinline const Vertex& f0_m() const { return f[0][0]; }
83
    __forceinline const Vertex& f1_m() const { return f[0][1]; }
84
    __forceinline const Vertex& f2_m() const { return f[1][1]; }
85
    __forceinline const Vertex& f3_m() const { return f[1][0]; }
86
    
87
    __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) {
88
      return irreg_patch.ring[index].getLimitVertex();
89
    }
90
    
91
    __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
92
      return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx);
93
    }
94
    
95
    __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
96
      return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx);
97
    }
98

99
    __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) 
100
    {
101
      CatmullClark1Ring3fa r0,r1,r2;
102
      irreg_patch.ring[index].subdivide(r0);
103
      r0.subdivide(r1);
104
      r1.subdivide(r2);
105
      return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx);
106
    }
107
    
108
    __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) 
109
    {
110
      CatmullClark1Ring3fa r0,r1,r2;
111
      irreg_patch.ring[index].subdivide(r0);
112
      r0.subdivide(r1);
113
      r1.subdivide(r2);
114
      return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx);
115
    }
116
    
117
    void initFaceVertex(const CatmullClarkPatch& irreg_patch, 
118
			const size_t index, 
119
			const Vertex& p_vtx, 
120
                        const Vertex& e0_p_vtx, 
121
			const Vertex& e1_m_vtx, 
122
			const unsigned int face_valence_p1,
123
 			const Vertex& e0_m_vtx,	
124
			const Vertex& e3_p_vtx,	
125
			const unsigned int face_valence_p3,
126
			Vertex& f_p_vtx, 
127
			Vertex& f_m_vtx)
128
    {
129
      const unsigned int face_valence = irreg_patch.ring[index].face_valence;
130
      const unsigned int edge_valence = irreg_patch.ring[index].edge_valence;
131
      const unsigned int border_index = irreg_patch.ring[index].border_index;
132
      
133
      const Vertex& vtx     = irreg_patch.ring[index].vtx;
134
      const Vertex e_i      = irreg_patch.ring[index].getEdgeCenter(0);
135
      const Vertex c_i_m_1  = irreg_patch.ring[index].getQuadCenter(0);
136
      const Vertex e_i_m_1  = irreg_patch.ring[index].getEdgeCenter(1);
137
      
138
      Vertex c_i, e_i_p_1;
139
      const bool hasHardEdge0 =
140
        std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
141
        std::isinf(irreg_patch.ring[index].crease_weight[0]);
142
                
143
      if (unlikely((border_index == edge_valence-2) || hasHardEdge0))
144
      {
145
        /* mirror quad center and edge mid-point */
146
        c_i     = madd(2.0f, e_i - c_i_m_1, c_i_m_1);
147
        e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1);
148
      }
149
      else
150
      {
151
        c_i     = irreg_patch.ring[index].getQuadCenter( face_valence-1 );
152
        e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 );
153
      }
154
      
155
      Vertex c_i_m_2, e_i_m_2;
156
      const bool hasHardEdge1 =
157
        std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
158
        std::isinf(irreg_patch.ring[index].crease_weight[1]);
159
      
160
      if (unlikely(border_index == 2 || hasHardEdge1))
161
      {
162
        /* mirror quad center and edge mid-point */
163
        c_i_m_2  = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1);
164
        e_i_m_2  = madd(2.0f, vtx - e_i, + e_i);
165
      }
166
      else
167
      {
168
        c_i_m_2  = irreg_patch.ring[index].getQuadCenter( 1 );
169
        e_i_m_2  = irreg_patch.ring[index].getEdgeCenter( 2 );
170
      }      
171
      
172
      const float d = 3.0f;
173
      //const float c     = cosf(2.0f*M_PI/(float)face_valence);
174
      //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1);
175
      //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3);
176
      
177
      const float c     = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
178
      const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
179
      const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
180

181
      const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i);
182
      const Vertex r_e_m = 1.0f/3.0f * (e_i     - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2);
183

184
      f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);      
185
      f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);     
186
    }
187

188
    __noinline void init(const CatmullClarkPatch& patch)
189
    {
190
      assert( patch.ring[0].hasValidPositions() );
191
      assert( patch.ring[1].hasValidPositions() );
192
      assert( patch.ring[2].hasValidPositions() );
193
      assert( patch.ring[3].hasValidPositions() );
194
      
195
      p0() = initCornerVertex(patch,0);
196
      p1() = initCornerVertex(patch,1);
197
      p2() = initCornerVertex(patch,2);
198
      p3() = initCornerVertex(patch,3);
199

200
      e0_p() = initPositiveEdgeVertex(patch,0, p0());
201
      e1_p() = initPositiveEdgeVertex(patch,1, p1());
202
      e2_p() = initPositiveEdgeVertex(patch,2, p2());
203
      e3_p() = initPositiveEdgeVertex(patch,3, p3());
204

205
      e0_m() = initNegativeEdgeVertex(patch,0, p0());
206
      e1_m() = initNegativeEdgeVertex(patch,1, p1());
207
      e2_m() = initNegativeEdgeVertex(patch,2, p2());
208
      e3_m() = initNegativeEdgeVertex(patch,3, p3());
209

210
      const unsigned int face_valence_p0 = patch.ring[0].face_valence;
211
      const unsigned int face_valence_p1 = patch.ring[1].face_valence;
212
      const unsigned int face_valence_p2 = patch.ring[2].face_valence;
213
      const unsigned int face_valence_p3 = patch.ring[3].face_valence;
214
      
215
      initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
216
      initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
217
      initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
218
      initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
219

220
    }
221

222
    __noinline void init_crackfix(const CatmullClarkPatch& patch, 
223
                                  const BezierCurve* border0, 
224
                                  const BezierCurve* border1,
225
                                  const BezierCurve* border2, 
226
                                  const BezierCurve* border3)
227
    {
228
      assert( patch.ring[0].hasValidPositions() );
229
      assert( patch.ring[1].hasValidPositions() );
230
      assert( patch.ring[2].hasValidPositions() );
231
      assert( patch.ring[3].hasValidPositions() );
232
      
233
      p0() = initCornerVertex(patch,0);
234
      p1() = initCornerVertex(patch,1);
235
      p2() = initCornerVertex(patch,2);
236
      p3() = initCornerVertex(patch,3);
237

238
      e0_p() = initPositiveEdgeVertex(patch,0, p0());
239
      e1_p() = initPositiveEdgeVertex(patch,1, p1());
240
      e2_p() = initPositiveEdgeVertex(patch,2, p2());
241
      e3_p() = initPositiveEdgeVertex(patch,3, p3());
242

243
      e0_m() = initNegativeEdgeVertex(patch,0, p0());
244
      e1_m() = initNegativeEdgeVertex(patch,1, p1());
245
      e2_m() = initNegativeEdgeVertex(patch,2, p2());
246
      e3_m() = initNegativeEdgeVertex(patch,3, p3());
247

248
      if (unlikely(border0 != nullptr)) 
249
      {         
250
        p0()   = border0->v0;
251
        e0_p() = border0->v1; 
252
        e1_m() = border0->v2; 
253
        p1()   = border0->v3;
254
      }
255
      
256
      if (unlikely(border1 != nullptr))
257
      {          
258
        p1()   = border1->v0; 
259
        e1_p() = border1->v1; 
260
        e2_m() = border1->v2; 
261
        p2()   = border1->v3; 
262
      }
263

264
      if (unlikely(border2 != nullptr))
265
      {          
266
        p2()   = border2->v0; 
267
        e2_p() = border2->v1; 
268
        e3_m() = border2->v2; 
269
        p3()   = border2->v3; 
270
      }
271

272
      if (unlikely(border3 != nullptr))
273
      {          
274
        p3()   = border3->v0; 
275
        e3_p() = border3->v1; 
276
        e0_m() = border3->v2; 
277
        p0()   = border3->v3; 
278
      }
279

280
      const unsigned int face_valence_p0 = patch.ring[0].face_valence;
281
      const unsigned int face_valence_p1 = patch.ring[1].face_valence;
282
      const unsigned int face_valence_p2 = patch.ring[2].face_valence;
283
      const unsigned int face_valence_p3 = patch.ring[3].face_valence;
284
      
285
      initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
286
      initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
287
      initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
288
      initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
289
    }
290

291
    
292
    void computeGregoryPatchFacePoints(const unsigned int face_valence,
293
				       const Vertex& r_e_p, 
294
				       const Vertex& r_e_m, 					 
295
				       const Vertex& p_vtx, 
296
				       const Vertex& e0_p_vtx, 
297
				       const Vertex& e1_m_vtx, 
298
				       const unsigned int face_valence_p1,
299
				       const Vertex& e0_m_vtx,	
300
				       const Vertex& e3_p_vtx,	
301
				       const unsigned int face_valence_p3,
302
				       Vertex& f_p_vtx, 
303
				       Vertex& f_m_vtx,
304
                                       const float d = 3.0f)
305
    {
306
      //const float c     = cosf(2.0*M_PI/(float)face_valence);
307
      //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1);
308
      //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3);
309

310
      const float c     = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
311
      const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
312
      const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
313

314

315
      f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);      
316
      f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);      
317
      f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);      
318
      f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
319
    }
320

321
    __noinline void init(const GeneralCatmullClarkPatch& patch)
322
    {
323
      assert(patch.size() == 4);
324
#if 0
325
      CatmullClarkPatch qpatch; patch.init(qpatch);
326
      init(qpatch);
327
#else
328
      const float face_valence_p0 = patch.ring[0].face_valence;
329
      const float face_valence_p1 = patch.ring[1].face_valence;
330
      const float face_valence_p2 = patch.ring[2].face_valence;
331
      const float face_valence_p3 = patch.ring[3].face_valence;
332

333
      Vertex p0_r_p, p0_r_m;
334
      patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m );
335

336
      Vertex p1_r_p, p1_r_m;
337
      patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m );
338
      
339
      Vertex p2_r_p, p2_r_m;
340
      patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m );
341

342
      Vertex p3_r_p, p3_r_m;
343
      patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m );
344

345
      computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() );
346
      computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() );
347
      computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() );
348
      computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() );
349

350
#endif
351
    }
352
   
353
    
354
    __forceinline void convert_to_bezier()
355
    {
356
      f0_p() = (f0_p() + f0_m()) * 0.5f;
357
      f1_p() = (f1_p() + f1_m()) * 0.5f;
358
      f2_p() = (f2_p() + f2_m()) * 0.5f;
359
      f3_p() = (f3_p() + f3_m()) * 0.5f;
360
      f0_m() = Vertex( zero );
361
      f1_m() = Vertex( zero );
362
      f2_m() = Vertex( zero );
363
      f3_m() = Vertex( zero );      
364
    }
365
    
366
    static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv,
367
						   Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21)
368
    {
369
      if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f)) 
370
      {
371
	matrix_11 = matrix[1][1];
372
	matrix_12 = matrix[1][2];
373
	matrix_22 = matrix[2][2];
374
	matrix_21 = matrix[2][1];	 
375
      }
376
      else
377
      {
378
	const Vertex_t f0_p = matrix[1][1];
379
	const Vertex_t f1_p = matrix[1][2];
380
	const Vertex_t f2_p = matrix[2][2];
381
	const Vertex_t f3_p = matrix[2][1];
382
        
383
	const Vertex_t f0_m = f_m[0][0];
384
	const Vertex_t f1_m = f_m[0][1];
385
	const Vertex_t f2_m = f_m[1][1];
386
	const Vertex_t f3_m = f_m[1][0];
387
        
388
	matrix_11 = (      uu  * f0_p +       vv  * f0_m)*rcp(uu+vv);
389
	matrix_12 = ((1.0f-uu) * f1_m +       vv  * f1_p)*rcp(1.0f-uu+vv);
390
	matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv);
391
	matrix_21 = (      uu  * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv);
392
      }
393
    } 
394

395
    template<typename vfloat>
396
    static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2], 
397
                                                   size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) 
398
    {
399
      const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
400

401
      const vfloat f0_p = v[1][1][i];
402
      const vfloat f1_p = v[1][2][i];
403
      const vfloat f2_p = v[2][2][i];
404
      const vfloat f3_p = v[2][1][i];
405
      
406
      const vfloat f0_m = f[0][0][i];
407
      const vfloat f1_m = f[0][1][i];
408
      const vfloat f2_m = f[1][1][i];
409
      const vfloat f3_m = f[1][0][i];
410
      
411
      const vfloat one_minus_uu = vfloat(1.0f) - uu;
412
      const vfloat one_minus_vv = vfloat(1.0f) - vv;      
413
      
414
      const vfloat f0_i = (          uu * f0_p +           vv * f0_m) * rcp(uu+vv);
415
      const vfloat f1_i = (one_minus_uu * f1_m +           vv * f1_p) * rcp(one_minus_uu+vv);
416
      const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
417
      const vfloat f3_i = (          uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
418
      
419
      matrix_11 = select(m_border,f0_p,f0_i);
420
      matrix_12 = select(m_border,f1_p,f1_i);
421
      matrix_22 = select(m_border,f2_p,f2_i);
422
      matrix_21 = select(m_border,f3_p,f3_i);
423
    }
424

425
    static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv) 
426
    {
427
      Vertex_t v_11, v_12, v_22, v_21;
428
      computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
429
      
430
      const Vec4<float> Bu = BezierBasis::eval(uu);
431
      const Vec4<float> Bv = BezierBasis::eval(vv);
432
      
433
      return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 
434
                  madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))), 
435
                       madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))), 
436
                            Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 
437
    }
438

439
    static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
440
    {
441
      Vertex_t v_11, v_12, v_22, v_21;
442
      computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
443
      
444
      const Vec4<float> Bu = BezierBasis::derivative(uu);
445
      const Vec4<float> Bv = BezierBasis::eval(vv);
446

447
      return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 
448
                  madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))), 
449
                       madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))), 
450
                            Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 
451
    }
452

453
    static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
454
    {
455
      Vertex_t v_11, v_12, v_22, v_21;
456
      computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
457
      
458
      const Vec4<float> Bu = BezierBasis::eval(uu);
459
      const Vec4<float> Bv = BezierBasis::derivative(vv);
460
 
461
      return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 
462
                  madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))), 
463
                       madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))), 
464
                            Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 
465
    }
466

467
    static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
468
    {
469
      Vertex_t v_11, v_12, v_22, v_21;
470
      computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
471
      
472
      const Vec4<float> Bu = BezierBasis::derivative2(uu);
473
      const Vec4<float> Bv = BezierBasis::eval(vv);
474
 
475
      return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 
476
                  madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))), 
477
                       madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))), 
478
                            Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 
479
     }
480

481
    static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
482
    {
483
      Vertex_t v_11, v_12, v_22, v_21;
484
      computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
485
      
486
      const Vec4<float> Bu = BezierBasis::eval(uu);
487
      const Vec4<float> Bv = BezierBasis::derivative2(vv);
488

489
      return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 
490
                  madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))), 
491
                       madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))), 
492
                            Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 
493
    }
494

495
    static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
496
    {
497
      Vertex_t v_11, v_12, v_22, v_21;
498
      computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
499
      
500
      const Vec4<float> Bu = BezierBasis::derivative(uu);
501
      const Vec4<float> Bv = BezierBasis::derivative(vv);
502

503
      return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 
504
                  madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))), 
505
                       madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))), 
506
                            Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 
507
    }
508

509
    __forceinline Vertex eval(const float uu, const float vv) const {
510
      return eval(v,f,uu,vv);
511
    }
512

513
    __forceinline Vertex eval_du( const float uu, const float vv) const {
514
      return eval_du(v,f,uu,vv);
515
    }
516

517
    __forceinline Vertex eval_dv( const float uu, const float vv) const {
518
      return eval_dv(v,f,uu,vv);
519
    }
520

521
    __forceinline Vertex eval_dudu( const float uu, const float vv) const {
522
      return eval_dudu(v,f,uu,vv);
523
    }
524

525
    __forceinline Vertex eval_dvdv( const float uu, const float vv) const {
526
      return eval_dvdv(v,f,uu,vv);
527
    }
528

529
    __forceinline Vertex eval_dudv( const float uu, const float vv) const {
530
      return eval_dudv(v,f,uu,vv);
531
    }
532

533
    static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv)  // FIXME: why not using basis functions
534
    {
535
      /* interpolate inner vertices */
536
      Vertex_t matrix_11, matrix_12, matrix_22, matrix_21;
537
      computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21);
538
      
539
      /* tangentU */
540
      const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]);
541
      const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11   , (Vertex_t)matrix_21   , (Vertex_t)matrix[3][1]);
542
      const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12   , (Vertex_t)matrix_22   , (Vertex_t)matrix[3][2]);
543
      const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]);
544
      
545
      const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
546
      
547
      /* tangentV */
548
      const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]);
549
      const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11   , (Vertex_t)matrix_12   , (Vertex_t)matrix[1][3]);
550
      const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21   , (Vertex_t)matrix_22   , (Vertex_t)matrix[2][3]);
551
      const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]);
552
      
553
      const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
554
      
555
      /* normal = tangentU x tangentV */
556
      const Vertex_t n = cross(tangentU,tangentV);
557
      
558
      return n;     
559
    }
560
   
561
    __forceinline Vertex normal( const float uu, const float vv) const {
562
      return normal(v,f,uu,vv);
563
    }    
564
    
565
    __forceinline void eval(const float u, const float v, 
566
                            Vertex* P, Vertex* dPdu, Vertex* dPdv, 
567
                            Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
568
                            const float dscale = 1.0f) const
569
    {
570
      if (P) {
571
        *P = eval(u,v); 
572
      }
573
      if (dPdu) {
574
        assert(dPdu); *dPdu = eval_du(u,v)*dscale; 
575
        assert(dPdv); *dPdv = eval_dv(u,v)*dscale; 
576
      }
577
      if (ddPdudu) {
578
        assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale); 
579
        assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale); 
580
        assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale); 
581
      }
582
    }
583

584
    template<class vfloat>
585
    static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2], 
586
                                     const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n,
587
                                     vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
588
    {
589
      const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
590
      const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i]))));
591
      const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i]))));
592
      const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
593
      return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
594
    }
595
    
596
    template<typename vbool, typename vfloat>
597
    static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2], 
598
                                   const vbool& valid, const vfloat& uu, const vfloat& vv, 
599
                                   float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
600
                                   const float dscale, const size_t dstride, const size_t N) 
601
    {
602
      if (P) {
603
        const Vec4<vfloat> u_n = BezierBasis::eval(uu); 
604
        const Vec4<vfloat> v_n = BezierBasis::eval(vv); 
605
        for (size_t i=0; i<N; i++) {
606
          vfloat matrix_11, matrix_12, matrix_22, matrix_21;
607
          computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
608
          vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21));
609
        }
610
      }
611
      if (dPdu)
612
      {
613
        {
614
          assert(dPdu);
615
          const Vec4<vfloat> u_n = BezierBasis::derivative(uu); 
616
          const Vec4<vfloat> v_n = BezierBasis::eval(vv);
617
          for (size_t i=0; i<N; i++) {
618
            vfloat matrix_11, matrix_12, matrix_22, matrix_21;
619
            computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
620
            vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
621
          }
622
        }
623
        {
624
          assert(dPdv);
625
          const Vec4<vfloat> u_n = BezierBasis::eval(uu); 
626
          const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
627
          for (size_t i=0; i<N; i++) {
628
            vfloat matrix_11, matrix_12, matrix_22, matrix_21;
629
            computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
630
            vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
631
          }
632
        }
633
      }
634
      if (ddPdudu)
635
      {
636
        {
637
          assert(ddPdudu);
638
          const Vec4<vfloat> u_n = BezierBasis::derivative2(uu); 
639
          const Vec4<vfloat> v_n = BezierBasis::eval(vv);
640
          for (size_t i=0; i<N; i++) {
641
            vfloat matrix_11, matrix_12, matrix_22, matrix_21;
642
            computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
643
            vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
644
          }
645
        }
646
        {
647
          assert(ddPdvdv);
648
          const Vec4<vfloat> u_n = BezierBasis::eval(uu); 
649
          const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
650
          for (size_t i=0; i<N; i++) {
651
            vfloat matrix_11, matrix_12, matrix_22, matrix_21;
652
            computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
653
            vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
654
          }
655
        }
656
        {
657
          assert(ddPdudv);
658
          const Vec4<vfloat> u_n = BezierBasis::derivative(uu); 
659
          const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
660
          for (size_t i=0; i<N; i++) {
661
            vfloat matrix_11, matrix_12, matrix_22, matrix_21;
662
            computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
663
            vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
664
          }
665
        }
666
      }
667
    }
668

669
    template<typename vbool, typename vfloat>
670
    __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, 
671
                            float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
672
                            const float dscale, const size_t dstride, const size_t N) const {
673
      eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
674
    }
675

676
    template<class T>
677
      static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) 
678
    {
679
      typedef typename T::Bool M;
680
      const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
681

682
      const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
683
      const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
684
      const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
685
      const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
686
      
687
      const Vec3<T> f0_m = f[0][0];
688
      const Vec3<T> f1_m = f[0][1];
689
      const Vec3<T> f2_m = f[1][1];
690
      const Vec3<T> f3_m = f[1][0];
691
      
692
      const T one_minus_uu = T(1.0f) - uu;
693
      const T one_minus_vv = T(1.0f) - vv;      
694
      
695
      const Vec3<T> f0_i = (          uu * f0_p +           vv * f0_m) * rcp(uu+vv);
696
      const Vec3<T> f1_i = (one_minus_uu * f1_m +           vv * f1_p) * rcp(one_minus_uu+vv);
697
      const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
698
      const Vec3<T> f3_i = (          uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
699
      
700
      const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
701
      const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
702
      const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
703
      const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
704

705
      const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
706
      const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
707
      const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
708
      const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
709
      const T B2_u = 3.0f * (uu * one_minus_uu * uu);
710
      const T B2_v = 3.0f * (vv * one_minus_vv * vv);
711
      const T B3_u = uu * uu * uu;
712
      const T B3_v = vv * vv * vv;
713

714
      const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))), 
715
                  madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x          ,madd(B2_u,F1.x          ,B3_u * matrix[1][3].x))), 
716
                  madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x          ,madd(B2_u,F2.x          ,B3_u * matrix[2][3].x))), 
717
                       B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x)))))); 
718

719
      const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))),
720
                  madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y          ,madd(B2_u,F1.y          ,B3_u * matrix[1][3].y))),
721
                  madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y          ,madd(B2_u,F2.y          ,B3_u * matrix[2][3].y))),
722
                       B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y))))));
723
      
724
      const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))),
725
                  madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z          ,madd(B2_u,F1.z          ,B3_u * matrix[1][3].z))),
726
                  madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z          ,madd(B2_u,F2.z          ,B3_u * matrix[2][3].z))),
727
                       B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z))))));
728
      
729
      return Vec3<T>(x,y,z);
730
    }
731

732
    template<class T>
733
    __forceinline Vec3<T> eval(const T& uu, const T& vv) const 
734
    {
735
      Vec3<T> ff[2][2];
736
      ff[0][0] = Vec3<T>(f[0][0]);
737
      ff[0][1] = Vec3<T>(f[0][1]);
738
      ff[1][1] = Vec3<T>(f[1][1]);
739
      ff[1][0] = Vec3<T>(f[1][0]);
740
      return eval_t(v,ff,uu,vv);
741
    }
742

743
    template<class T>
744
      static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) 
745
    {
746
      typedef typename T::Bool M;
747
      
748
      const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
749
      const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
750
      const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
751
      const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
752

753
      const Vec3<T> f0_m = f[0][0];
754
      const Vec3<T> f1_m = f[0][1];
755
      const Vec3<T> f2_m = f[1][1];
756
      const Vec3<T> f3_m = f[1][0];
757
      
758
      const T one_minus_uu = T(1.0f) - uu;
759
      const T one_minus_vv = T(1.0f) - vv;      
760
      
761
      const Vec3<T> f0_i = (          uu * f0_p +           vv * f0_m) * rcp(uu+vv);
762
      const Vec3<T> f1_i = (one_minus_uu * f1_m +           vv * f1_p) * rcp(one_minus_uu+vv);
763
      const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
764
      const Vec3<T> f3_i = (          uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
765

766
#if 1
767
      const M m_corner0 = (uu == 0.0f) & (vv == 0.0f);
768
      const M m_corner1 = (uu == 1.0f) & (vv == 0.0f);
769
      const M m_corner2 = (uu == 1.0f) & (vv == 1.0f);
770
      const M m_corner3 = (uu == 0.0f) & (vv == 1.0f);      
771
      const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) );
772
      const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) );
773
      const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) );
774
      const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) );
775
#else
776
      const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
777
      const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
778
      const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
779
      const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
780
      const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
781
#endif
782
      
783
      const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
784
      const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
785
      const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
786
      const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
787
      
788
      const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
789
      const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
790
      const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
791
      
792
      const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
793
      const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
794
      const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
795
      
796
      const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
797
      const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
798
      
799
      /* tangentU */
800
      const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
801
      const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
802
      const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
803
      const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
804
      
805
      const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
806
      
807
      /* tangentV */
808
      const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
809
      const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
810
      const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
811
      const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
812
      
813
      const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
814
      
815
      /* normal = tangentU x tangentV */
816
      const Vec3<T> n = cross(tangentU,tangentV);
817
      return n;
818
    }
819

820
     template<class T>
821
    __forceinline Vec3<T> normal(const T& uu, const T& vv) const 
822
    {
823
      Vec3<T> ff[2][2];
824
      ff[0][0] = Vec3<T>(f[0][0]);
825
      ff[0][1] = Vec3<T>(f[0][1]);
826
      ff[1][1] = Vec3<T>(f[1][1]);
827
      ff[1][0] = Vec3<T>(f[1][0]);
828
      return normal_t(v,ff,uu,vv);
829
    }
830

831
    __forceinline BBox<Vertex> bounds() const
832
    {
833
      const Vertex *const cv = &v[0][0];
834
      BBox<Vertex> bounds (cv[0]);
835
      for (size_t i=1; i<16; i++) 
836
        bounds.extend( cv[i] );
837
      bounds.extend(f[0][0]);
838
      bounds.extend(f[1][0]);
839
      bounds.extend(f[1][1]);
840
      bounds.extend(f[1][1]);
841
      return bounds;
842
    }
843
    
844
    friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p)
845
    {
846
      for (size_t y=0; y<4; y++)
847
	for (size_t x=0; x<4; x++)
848
	  o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
849
      
850
      for (size_t y=0; y<2; y++)
851
	for (size_t x=0; x<2; x++)
852
	  o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl;
853
      return o;
854
    } 
855
  };
856

857
  typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa;
858

859
  template<typename Vertex, typename Vertex_t>
860
    __forceinline  BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride) 
861
  {
862
    CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride);
863
    GregoryPatchT<Vertex,Vertex_t> gpatch(patch); 
864
    gpatch.convert_to_bezier(); 
865
    for (size_t y=0; y<4; y++)
866
      for (size_t x=0; x<4; x++)
867
        matrix[y][x] = (Vertex_t)gpatch.v[y][x];
868
  }
869
  
870
   template<typename Vertex, typename Vertex_t>
871
    __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch) 
872
    {
873
      GregoryPatchT<Vertex,Vertex_t> gpatch(patch); 
874
      gpatch.convert_to_bezier(); 
875
      for (size_t y=0; y<4; y++)
876
	for (size_t x=0; x<4; x++)
877
	  matrix[y][x] = (Vertex_t)gpatch.v[y][x];
878
    }
879

880
   template<typename Vertex, typename Vertex_t>
881
     __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch, 
882
                                                               const BezierCurveT<Vertex>* border0,
883
                                                               const BezierCurveT<Vertex>* border1,
884
                                                               const BezierCurveT<Vertex>* border2,
885
                                                               const BezierCurveT<Vertex>* border3) 
886
    {
887
      GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3); 
888
      gpatch.convert_to_bezier(); 
889
      for (size_t y=0; y<4; y++)
890
	for (size_t x=0; x<4; x++)
891
	  matrix[y][x] = (Vertex_t)gpatch.v[y][x];
892
    }
893
}
894

895
Product

Resources

Company