Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_opencl.cpp
9902 views
1
// basisu_opencl.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#include "basisu_opencl.h"
16
17
// If 1, the kernel source code will come from encoders/ocl_kernels.h. Otherwise, it will be read from the "ocl_kernels.cl" file in the current directory (for development).
18
#define BASISU_USE_OCL_KERNELS_HEADER (1)
19
#define BASISU_OCL_KERNELS_FILENAME "ocl_kernels.cl"
20
21
#if BASISU_SUPPORT_OPENCL
22
23
#include "basisu_enc.h"
24
25
// We only use OpenCL v1.2 or less.
26
#define CL_TARGET_OPENCL_VERSION 120
27
28
#ifdef __APPLE__
29
#include <OpenCL/opencl.h>
30
#else
31
#include <CL/cl.h>
32
#endif
33
34
#define BASISU_OPENCL_ASSERT_ON_ANY_ERRORS (1)
35
36
namespace basisu
37
{
38
#if BASISU_USE_OCL_KERNELS_HEADER
39
#include "basisu_ocl_kernels.h"
40
#endif
41
42
static void ocl_error_printf(const char* pFmt, ...)
43
{
44
va_list args;
45
va_start(args, pFmt);
46
error_vprintf(pFmt, args);
47
va_end(args);
48
49
#if BASISU_OPENCL_ASSERT_ON_ANY_ERRORS
50
assert(0);
51
#endif
52
}
53
54
class ocl
55
{
56
public:
57
ocl()
58
{
59
memset(&m_dev_fp_config, 0, sizeof(m_dev_fp_config));
60
61
m_ocl_mutex.lock();
62
m_ocl_mutex.unlock();
63
}
64
65
~ocl()
66
{
67
}
68
69
bool is_initialized() const { return m_device_id != nullptr; }
70
71
cl_device_id get_device_id() const { return m_device_id; }
72
cl_context get_context() const { return m_context; }
73
cl_command_queue get_command_queue() { return m_command_queue; }
74
cl_program get_program() const { return m_program; }
75
76
bool init(bool force_serialization)
77
{
78
deinit();
79
80
interval_timer tm;
81
tm.start();
82
83
cl_uint num_platforms = 0;
84
cl_int ret = clGetPlatformIDs(0, NULL, &num_platforms);
85
if (ret != CL_SUCCESS)
86
{
87
ocl_error_printf("ocl::init: clGetPlatformIDs() failed with %i\n", ret);
88
return false;
89
}
90
91
if ((!num_platforms) || (num_platforms > INT_MAX))
92
{
93
ocl_error_printf("ocl::init: clGetPlatformIDs() returned an invalid number of num_platforms\n");
94
return false;
95
}
96
97
std::vector<cl_platform_id> platforms(num_platforms);
98
99
ret = clGetPlatformIDs(num_platforms, platforms.data(), NULL);
100
if (ret != CL_SUCCESS)
101
{
102
ocl_error_printf("ocl::init: clGetPlatformIDs() failed\n");
103
return false;
104
}
105
106
cl_uint num_devices = 0;
107
ret = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &m_device_id, &num_devices);
108
109
if (ret == CL_DEVICE_NOT_FOUND)
110
{
111
ocl_error_printf("ocl::init: Couldn't get any GPU device ID's, trying CL_DEVICE_TYPE_CPU\n");
112
113
ret = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &m_device_id, &num_devices);
114
}
115
116
if (ret != CL_SUCCESS)
117
{
118
ocl_error_printf("ocl::init: Unable to get any device ID's\n");
119
120
m_device_id = nullptr;
121
return false;
122
}
123
124
ret = clGetDeviceInfo(m_device_id,
125
CL_DEVICE_SINGLE_FP_CONFIG,
126
sizeof(m_dev_fp_config),
127
&m_dev_fp_config,
128
nullptr);
129
if (ret != CL_SUCCESS)
130
{
131
ocl_error_printf("ocl::init: clGetDeviceInfo() failed\n");
132
return false;
133
}
134
135
char plat_vers[256];
136
size_t rv = 0;
137
ret = clGetPlatformInfo(platforms[0], CL_PLATFORM_VERSION, sizeof(plat_vers), plat_vers, &rv);
138
if (ret == CL_SUCCESS)
139
printf("OpenCL platform version: \"%s\"\n", plat_vers);
140
141
// Serialize CL calls with the AMD driver to avoid lockups when multiple command queues per thread are used. This sucks, but what can we do?
142
m_use_mutex = (strstr(plat_vers, "AMD") != nullptr) || force_serialization;
143
144
printf("Serializing OpenCL calls across threads: %u\n", (uint32_t)m_use_mutex);
145
146
m_context = clCreateContext(nullptr, 1, &m_device_id, nullptr, nullptr, &ret);
147
if (ret != CL_SUCCESS)
148
{
149
ocl_error_printf("ocl::init: clCreateContext() failed\n");
150
151
m_device_id = nullptr;
152
m_context = nullptr;
153
return false;
154
}
155
156
m_command_queue = clCreateCommandQueue(m_context, m_device_id, 0, &ret);
157
if (ret != CL_SUCCESS)
158
{
159
ocl_error_printf("ocl::init: clCreateCommandQueue() failed\n");
160
161
deinit();
162
return false;
163
}
164
165
printf("OpenCL init time: %3.3f secs\n", tm.get_elapsed_secs());
166
167
return true;
168
}
169
170
bool deinit()
171
{
172
if (m_program)
173
{
174
clReleaseProgram(m_program);
175
m_program = nullptr;
176
}
177
178
if (m_command_queue)
179
{
180
clReleaseCommandQueue(m_command_queue);
181
m_command_queue = nullptr;
182
}
183
184
if (m_context)
185
{
186
clReleaseContext(m_context);
187
m_context = nullptr;
188
}
189
190
m_device_id = nullptr;
191
192
return true;
193
}
194
195
cl_command_queue create_command_queue()
196
{
197
cl_serializer serializer(this);
198
199
cl_int ret = 0;
200
cl_command_queue p = clCreateCommandQueue(m_context, m_device_id, 0, &ret);
201
if (ret != CL_SUCCESS)
202
return nullptr;
203
204
return p;
205
}
206
207
void destroy_command_queue(cl_command_queue p)
208
{
209
if (p)
210
{
211
cl_serializer serializer(this);
212
213
clReleaseCommandQueue(p);
214
}
215
}
216
217
bool init_program(const char* pSrc, size_t src_size)
218
{
219
cl_int ret;
220
221
if (m_program != nullptr)
222
{
223
clReleaseProgram(m_program);
224
m_program = nullptr;
225
}
226
227
m_program = clCreateProgramWithSource(m_context, 1, (const char**)&pSrc, (const size_t*)&src_size, &ret);
228
if (ret != CL_SUCCESS)
229
{
230
ocl_error_printf("ocl::init_program: clCreateProgramWithSource() failed!\n");
231
return false;
232
}
233
234
std::string options;
235
if (m_dev_fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
236
{
237
options += "-cl-fp32-correctly-rounded-divide-sqrt";
238
}
239
240
options += " -cl-std=CL1.2";
241
//options += " -cl-opt-disable";
242
//options += " -cl-mad-enable";
243
//options += " -cl-fast-relaxed-math";
244
245
ret = clBuildProgram(m_program, 1, &m_device_id,
246
options.size() ? options.c_str() : nullptr, // options
247
nullptr, // notify
248
nullptr); // user_data
249
250
if (ret != CL_SUCCESS)
251
{
252
const cl_int build_program_result = ret;
253
254
size_t ret_val_size;
255
ret = clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
256
if (ret != CL_SUCCESS)
257
{
258
ocl_error_printf("ocl::init_program: clGetProgramBuildInfo() failed!\n");
259
return false;
260
}
261
262
std::vector<char> build_log(ret_val_size + 1);
263
264
ret = clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log.data(), NULL);
265
266
ocl_error_printf("\nclBuildProgram() failed with error %i:\n%s", build_program_result, build_log.data());
267
268
return false;
269
}
270
271
return true;
272
}
273
274
cl_kernel create_kernel(const char* pName)
275
{
276
if (!m_program)
277
return nullptr;
278
279
cl_serializer serializer(this);
280
281
cl_int ret;
282
cl_kernel kernel = clCreateKernel(m_program, pName, &ret);
283
if (ret != CL_SUCCESS)
284
{
285
ocl_error_printf("ocl::create_kernel: clCreateKernel() failed!\n");
286
return nullptr;
287
}
288
289
return kernel;
290
}
291
292
bool destroy_kernel(cl_kernel k)
293
{
294
if (k)
295
{
296
cl_serializer serializer(this);
297
298
cl_int ret = clReleaseKernel(k);
299
if (ret != CL_SUCCESS)
300
{
301
ocl_error_printf("ocl::destroy_kernel: clReleaseKernel() failed!\n");
302
return false;
303
}
304
}
305
return true;
306
}
307
308
cl_mem alloc_read_buffer(size_t size)
309
{
310
cl_serializer serializer(this);
311
312
cl_int ret;
313
cl_mem obj = clCreateBuffer(m_context, CL_MEM_READ_ONLY, size, NULL, &ret);
314
if (ret != CL_SUCCESS)
315
{
316
ocl_error_printf("ocl::alloc_read_buffer: clCreateBuffer() failed!\n");
317
return nullptr;
318
}
319
320
return obj;
321
}
322
323
cl_mem alloc_and_init_read_buffer(cl_command_queue command_queue, const void *pInit, size_t size)
324
{
325
cl_serializer serializer(this);
326
327
cl_int ret;
328
cl_mem obj = clCreateBuffer(m_context, CL_MEM_READ_ONLY, size, NULL, &ret);
329
if (ret != CL_SUCCESS)
330
{
331
ocl_error_printf("ocl::alloc_and_init_read_buffer: clCreateBuffer() failed!\n");
332
return nullptr;
333
}
334
335
#if 0
336
if (!write_to_buffer(command_queue, obj, pInit, size))
337
{
338
destroy_buffer(obj);
339
return nullptr;
340
}
341
#else
342
ret = clEnqueueWriteBuffer(command_queue, obj, CL_TRUE, 0, size, pInit, 0, NULL, NULL);
343
if (ret != CL_SUCCESS)
344
{
345
ocl_error_printf("ocl::alloc_and_init_read_buffer: clEnqueueWriteBuffer() failed!\n");
346
return nullptr;
347
}
348
#endif
349
350
return obj;
351
}
352
353
cl_mem alloc_write_buffer(size_t size)
354
{
355
cl_serializer serializer(this);
356
357
cl_int ret;
358
cl_mem obj = clCreateBuffer(m_context, CL_MEM_WRITE_ONLY, size, NULL, &ret);
359
if (ret != CL_SUCCESS)
360
{
361
ocl_error_printf("ocl::alloc_write_buffer: clCreateBuffer() failed!\n");
362
return nullptr;
363
}
364
365
return obj;
366
}
367
368
bool destroy_buffer(cl_mem buf)
369
{
370
if (buf)
371
{
372
cl_serializer serializer(this);
373
374
cl_int ret = clReleaseMemObject(buf);
375
if (ret != CL_SUCCESS)
376
{
377
ocl_error_printf("ocl::destroy_buffer: clReleaseMemObject() failed!\n");
378
return false;
379
}
380
}
381
382
return true;
383
}
384
385
bool write_to_buffer(cl_command_queue command_queue, cl_mem clmem, const void* d, const size_t m)
386
{
387
cl_serializer serializer(this);
388
389
cl_int ret = clEnqueueWriteBuffer(command_queue, clmem, CL_TRUE, 0, m, d, 0, NULL, NULL);
390
if (ret != CL_SUCCESS)
391
{
392
ocl_error_printf("ocl::write_to_buffer: clEnqueueWriteBuffer() failed!\n");
393
return false;
394
}
395
396
return true;
397
}
398
399
bool read_from_buffer(cl_command_queue command_queue, const cl_mem clmem, void* d, size_t m)
400
{
401
cl_serializer serializer(this);
402
403
cl_int ret = clEnqueueReadBuffer(command_queue, clmem, CL_TRUE, 0, m, d, 0, NULL, NULL);
404
if (ret != CL_SUCCESS)
405
{
406
ocl_error_printf("ocl::read_from_buffer: clEnqueueReadBuffer() failed!\n");
407
return false;
408
}
409
410
return true;
411
}
412
413
cl_mem create_read_image_u8(uint32_t width, uint32_t height, const void* pPixels, uint32_t bytes_per_pixel, bool normalized)
414
{
415
cl_image_format fmt = get_image_format(bytes_per_pixel, normalized);
416
417
cl_image_desc desc;
418
memset(&desc, 0, sizeof(desc));
419
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
420
desc.image_width = width;
421
desc.image_height = height;
422
desc.image_row_pitch = width * bytes_per_pixel;
423
424
cl_serializer serializer(this);
425
426
cl_int ret;
427
cl_mem img = clCreateImage(m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &fmt, &desc, (void*)pPixels, &ret);
428
if (ret != CL_SUCCESS)
429
{
430
ocl_error_printf("ocl::create_read_image_u8: clCreateImage() failed!\n");
431
return nullptr;
432
}
433
434
return img;
435
}
436
437
cl_mem create_write_image_u8(uint32_t width, uint32_t height, uint32_t bytes_per_pixel, bool normalized)
438
{
439
cl_image_format fmt = get_image_format(bytes_per_pixel, normalized);
440
441
cl_image_desc desc;
442
memset(&desc, 0, sizeof(desc));
443
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
444
desc.image_width = width;
445
desc.image_height = height;
446
447
cl_serializer serializer(this);
448
449
cl_int ret;
450
cl_mem img = clCreateImage(m_context, CL_MEM_WRITE_ONLY, &fmt, &desc, nullptr, &ret);
451
if (ret != CL_SUCCESS)
452
{
453
ocl_error_printf("ocl::create_write_image_u8: clCreateImage() failed!\n");
454
return nullptr;
455
}
456
457
return img;
458
}
459
460
bool read_from_image(cl_command_queue command_queue, cl_mem img, void* pPixels, uint32_t ofs_x, uint32_t ofs_y, uint32_t width, uint32_t height)
461
{
462
cl_serializer serializer(this);
463
464
size_t origin[3] = { ofs_x, ofs_y, 0 }, region[3] = { width, height, 1 };
465
466
cl_int err = clEnqueueReadImage(command_queue, img, CL_TRUE, origin, region, 0, 0, pPixels, 0, NULL, NULL);
467
if (err != CL_SUCCESS)
468
{
469
ocl_error_printf("ocl::read_from_image: clEnqueueReadImage() failed!\n");
470
return false;
471
}
472
473
return true;
474
}
475
476
bool run_1D(cl_command_queue command_queue, const cl_kernel kernel, size_t num_items)
477
{
478
cl_serializer serializer(this);
479
480
cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel,
481
1, // work_dim
482
nullptr, // global_work_offset
483
&num_items, // global_work_size
484
nullptr, // local_work_size
485
0, // num_events_in_wait_list
486
nullptr, // event_wait_list
487
nullptr // event
488
);
489
490
if (ret != CL_SUCCESS)
491
{
492
ocl_error_printf("ocl::run_1D: clEnqueueNDRangeKernel() failed!\n");
493
return false;
494
}
495
496
return true;
497
}
498
499
bool run_2D(cl_command_queue command_queue, const cl_kernel kernel, size_t width, size_t height)
500
{
501
cl_serializer serializer(this);
502
503
size_t num_global_items[2] = { width, height };
504
//size_t num_local_items[2] = { 1, 1 };
505
506
cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel,
507
2, // work_dim
508
nullptr, // global_work_offset
509
num_global_items, // global_work_size
510
nullptr, // local_work_size
511
0, // num_events_in_wait_list
512
nullptr, // event_wait_list
513
nullptr // event
514
);
515
516
if (ret != CL_SUCCESS)
517
{
518
ocl_error_printf("ocl::run_2D: clEnqueueNDRangeKernel() failed!\n");
519
return false;
520
}
521
522
return true;
523
}
524
525
bool run_2D(cl_command_queue command_queue, const cl_kernel kernel, size_t ofs_x, size_t ofs_y, size_t width, size_t height)
526
{
527
cl_serializer serializer(this);
528
529
size_t global_ofs[2] = { ofs_x, ofs_y };
530
size_t num_global_items[2] = { width, height };
531
//size_t num_local_items[2] = { 1, 1 };
532
533
cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel,
534
2, // work_dim
535
global_ofs, // global_work_offset
536
num_global_items, // global_work_size
537
nullptr, // local_work_size
538
0, // num_events_in_wait_list
539
nullptr, // event_wait_list
540
nullptr // event
541
);
542
543
if (ret != CL_SUCCESS)
544
{
545
ocl_error_printf("ocl::run_2D: clEnqueueNDRangeKernel() failed!\n");
546
return false;
547
}
548
549
return true;
550
}
551
552
void flush(cl_command_queue command_queue)
553
{
554
cl_serializer serializer(this);
555
556
clFlush(command_queue);
557
clFinish(command_queue);
558
}
559
560
template<typename T>
561
bool set_kernel_arg(cl_kernel kernel, uint32_t index, const T& obj)
562
{
563
cl_serializer serializer(this);
564
565
cl_int ret = clSetKernelArg(kernel, index, sizeof(T), (void*)&obj);
566
if (ret != CL_SUCCESS)
567
{
568
ocl_error_printf("ocl::set_kernel_arg: clSetKernelArg() failed!\n");
569
return false;
570
}
571
return true;
572
}
573
574
template<typename T>
575
bool set_kernel_args(cl_kernel kernel, const T& obj1)
576
{
577
cl_serializer serializer(this);
578
579
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1);
580
if (ret != CL_SUCCESS)
581
{
582
ocl_error_printf("ocl::set_kernel_arg: clSetKernelArg() failed!\n");
583
return false;
584
}
585
return true;
586
}
587
588
#define BASISU_CHECK_ERR if (ret != CL_SUCCESS) { ocl_error_printf("ocl::set_kernel_args: clSetKernelArg() failed!\n"); return false; }
589
590
template<typename T, typename U>
591
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2)
592
{
593
cl_serializer serializer(this);
594
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
595
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
596
return true;
597
}
598
599
template<typename T, typename U, typename V>
600
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3)
601
{
602
cl_serializer serializer(this);
603
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
604
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
605
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
606
return true;
607
}
608
609
template<typename T, typename U, typename V, typename W>
610
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4)
611
{
612
cl_serializer serializer(this);
613
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
614
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
615
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
616
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
617
return true;
618
}
619
620
template<typename T, typename U, typename V, typename W, typename X>
621
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5)
622
{
623
cl_serializer serializer(this);
624
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
625
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
626
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
627
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
628
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
629
return true;
630
}
631
632
template<typename T, typename U, typename V, typename W, typename X, typename Y>
633
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6)
634
{
635
cl_serializer serializer(this);
636
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
637
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
638
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
639
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
640
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
641
ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR
642
return true;
643
}
644
645
template<typename T, typename U, typename V, typename W, typename X, typename Y, typename Z>
646
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6, const Z& obj7)
647
{
648
cl_serializer serializer(this);
649
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
650
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
651
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
652
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
653
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
654
ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR
655
ret = clSetKernelArg(kernel, 6, sizeof(Z), (void*)&obj7); BASISU_CHECK_ERR
656
return true;
657
}
658
659
template<typename T, typename U, typename V, typename W, typename X, typename Y, typename Z, typename A>
660
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6, const Z& obj7, const A& obj8)
661
{
662
cl_serializer serializer(this);
663
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
664
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
665
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
666
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
667
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
668
ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR
669
ret = clSetKernelArg(kernel, 6, sizeof(Z), (void*)&obj7); BASISU_CHECK_ERR
670
ret = clSetKernelArg(kernel, 7, sizeof(A), (void*)&obj8); BASISU_CHECK_ERR
671
return true;
672
}
673
#undef BASISU_CHECK_ERR
674
675
private:
676
cl_device_id m_device_id = nullptr;
677
cl_context m_context = nullptr;
678
cl_command_queue m_command_queue = nullptr;
679
cl_program m_program = nullptr;
680
cl_device_fp_config m_dev_fp_config;
681
682
bool m_use_mutex = false;
683
std::mutex m_ocl_mutex;
684
685
// This helper object is used to optionally serialize all calls to the CL driver after initialization.
686
// Currently this is only used to work around race conditions in the Windows AMD driver.
687
struct cl_serializer
688
{
689
inline cl_serializer(const cl_serializer&);
690
cl_serializer& operator= (const cl_serializer&);
691
692
inline cl_serializer(ocl *p) : m_p(p)
693
{
694
if (m_p->m_use_mutex)
695
m_p->m_ocl_mutex.lock();
696
}
697
698
inline ~cl_serializer()
699
{
700
if (m_p->m_use_mutex)
701
m_p->m_ocl_mutex.unlock();
702
}
703
704
private:
705
ocl* m_p;
706
};
707
708
cl_image_format get_image_format(uint32_t bytes_per_pixel, bool normalized)
709
{
710
cl_image_format fmt;
711
switch (bytes_per_pixel)
712
{
713
case 1: fmt.image_channel_order = CL_LUMINANCE; break;
714
case 2: fmt.image_channel_order = CL_RG; break;
715
case 3: fmt.image_channel_order = CL_RGB; break;
716
case 4: fmt.image_channel_order = CL_RGBA; break;
717
default: assert(0); fmt.image_channel_order = CL_LUMINANCE; break;
718
}
719
720
fmt.image_channel_data_type = normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8;
721
return fmt;
722
}
723
};
724
725
// Library blobal state
726
ocl g_ocl;
727
728
bool opencl_init(bool force_serialization)
729
{
730
if (g_ocl.is_initialized())
731
{
732
assert(0);
733
return false;
734
}
735
736
if (!g_ocl.init(force_serialization))
737
{
738
ocl_error_printf("opencl_init: Failed initializing OpenCL\n");
739
return false;
740
}
741
742
const char* pKernel_src = nullptr;
743
size_t kernel_src_size = 0;
744
uint8_vec kernel_src;
745
746
#if BASISU_USE_OCL_KERNELS_HEADER
747
pKernel_src = reinterpret_cast<const char*>(ocl_kernels_cl);
748
kernel_src_size = ocl_kernels_cl_len;
749
#else
750
if (!read_file_to_vec(BASISU_OCL_KERNELS_FILENAME, kernel_src))
751
{
752
ocl_error_printf("opencl_init: Cannot read OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME);
753
g_ocl.deinit();
754
return false;
755
}
756
757
pKernel_src = (char*)kernel_src.data();
758
kernel_src_size = kernel_src.size();
759
#endif
760
761
if (!kernel_src_size)
762
{
763
ocl_error_printf("opencl_init: Invalid OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME);
764
g_ocl.deinit();
765
return false;
766
}
767
768
if (!g_ocl.init_program(pKernel_src, kernel_src_size))
769
{
770
ocl_error_printf("opencl_init: Failed compiling OpenCL program\n");
771
g_ocl.deinit();
772
return false;
773
}
774
775
printf("OpenCL support initialized successfully\n");
776
777
return true;
778
}
779
780
void opencl_deinit()
781
{
782
g_ocl.deinit();
783
}
784
785
bool opencl_is_available()
786
{
787
return g_ocl.is_initialized();
788
}
789
790
struct opencl_context
791
{
792
size_t m_ocl_total_pixel_blocks;
793
cl_mem m_ocl_pixel_blocks;
794
795
cl_command_queue m_command_queue;
796
797
cl_kernel m_ocl_encode_etc1s_blocks_kernel;
798
cl_kernel m_ocl_refine_endpoint_clusterization_kernel;
799
cl_kernel m_ocl_encode_etc1s_from_pixel_cluster_kernel;
800
cl_kernel m_ocl_find_optimal_selector_clusters_for_each_block_kernel;
801
cl_kernel m_ocl_determine_selectors_kernel;
802
};
803
804
opencl_context_ptr opencl_create_context()
805
{
806
if (!opencl_is_available())
807
{
808
ocl_error_printf("opencl_create_context: OpenCL not initialized\n");
809
assert(0);
810
return nullptr;
811
}
812
813
interval_timer tm;
814
tm.start();
815
816
opencl_context* pContext = static_cast<opencl_context * >(calloc(sizeof(opencl_context), 1));
817
if (!pContext)
818
return nullptr;
819
820
// To avoid driver bugs in some drivers - serialize this. Likely not necessary, we don't know.
821
// https://community.intel.com/t5/OpenCL-for-CPU/Bug-report-clCreateKernelsInProgram-is-not-thread-safe/td-p/1159771
822
823
pContext->m_command_queue = g_ocl.create_command_queue();
824
if (!pContext->m_command_queue)
825
{
826
ocl_error_printf("opencl_create_context: Failed creating OpenCL command queue!\n");
827
opencl_destroy_context(pContext);
828
return nullptr;
829
}
830
831
pContext->m_ocl_encode_etc1s_blocks_kernel = g_ocl.create_kernel("encode_etc1s_blocks");
832
if (!pContext->m_ocl_encode_etc1s_blocks_kernel)
833
{
834
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel encode_etc1s_block\n");
835
opencl_destroy_context(pContext);
836
return nullptr;
837
}
838
839
pContext->m_ocl_refine_endpoint_clusterization_kernel = g_ocl.create_kernel("refine_endpoint_clusterization");
840
if (!pContext->m_ocl_refine_endpoint_clusterization_kernel)
841
{
842
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel refine_endpoint_clusterization\n");
843
opencl_destroy_context(pContext);
844
return nullptr;
845
}
846
847
pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel = g_ocl.create_kernel("encode_etc1s_from_pixel_cluster");
848
if (!pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel)
849
{
850
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel encode_etc1s_from_pixel_cluster\n");
851
opencl_destroy_context(pContext);
852
return nullptr;
853
}
854
855
pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel = g_ocl.create_kernel("find_optimal_selector_clusters_for_each_block");
856
if (!pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel)
857
{
858
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel find_optimal_selector_clusters_for_each_block\n");
859
opencl_destroy_context(pContext);
860
return nullptr;
861
}
862
863
pContext->m_ocl_determine_selectors_kernel = g_ocl.create_kernel("determine_selectors");
864
if (!pContext->m_ocl_determine_selectors_kernel)
865
{
866
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel determine_selectors\n");
867
opencl_destroy_context(pContext);
868
return nullptr;
869
}
870
871
debug_printf("opencl_create_context: Elapsed time: %f secs\n", tm.get_elapsed_secs());
872
873
return pContext;
874
}
875
876
void opencl_destroy_context(opencl_context_ptr pContext)
877
{
878
if (!pContext)
879
return;
880
881
interval_timer tm;
882
tm.start();
883
884
g_ocl.destroy_buffer(pContext->m_ocl_pixel_blocks);
885
886
g_ocl.destroy_kernel(pContext->m_ocl_determine_selectors_kernel);
887
g_ocl.destroy_kernel(pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel);
888
g_ocl.destroy_kernel(pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel);
889
g_ocl.destroy_kernel(pContext->m_ocl_encode_etc1s_blocks_kernel);
890
g_ocl.destroy_kernel(pContext->m_ocl_refine_endpoint_clusterization_kernel);
891
892
g_ocl.destroy_command_queue(pContext->m_command_queue);
893
894
memset(pContext, 0, sizeof(opencl_context));
895
896
free(pContext);
897
898
debug_printf("opencl_destroy_context: Elapsed time: %f secs\n", tm.get_elapsed_secs());
899
}
900
901
#pragma pack(push, 1)
902
struct cl_encode_etc1s_param_struct
903
{
904
int m_total_blocks;
905
int m_perceptual;
906
int m_total_perms;
907
};
908
#pragma pack(pop)
909
910
bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks)
911
{
912
if (!opencl_is_available())
913
return false;
914
915
if (pContext->m_ocl_pixel_blocks)
916
{
917
g_ocl.destroy_buffer(pContext->m_ocl_pixel_blocks);
918
pContext->m_ocl_pixel_blocks = nullptr;
919
}
920
921
pContext->m_ocl_pixel_blocks = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_blocks, sizeof(cl_pixel_block) * total_blocks);
922
if (!pContext->m_ocl_pixel_blocks)
923
return false;
924
925
pContext->m_ocl_total_pixel_blocks = total_blocks;
926
927
return true;
928
}
929
930
bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms)
931
{
932
if (!opencl_is_available())
933
return false;
934
935
interval_timer tm;
936
tm.start();
937
938
assert(pContext->m_ocl_pixel_blocks);
939
if (!pContext->m_ocl_pixel_blocks)
940
return false;
941
942
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
943
944
cl_encode_etc1s_param_struct ps;
945
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
946
ps.m_perceptual = perceptual;
947
ps.m_total_perms = total_perms;
948
949
bool status = false;
950
951
cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps));
952
cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks);
953
954
if (!vars || !block_buf)
955
goto exit;
956
957
if (!g_ocl.set_kernel_args(pContext->m_ocl_encode_etc1s_blocks_kernel, vars, pContext->m_ocl_pixel_blocks, block_buf))
958
goto exit;
959
960
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_encode_etc1s_blocks_kernel, pContext->m_ocl_total_pixel_blocks, 1))
961
goto exit;
962
963
if (!g_ocl.read_from_buffer(pContext->m_command_queue, block_buf, pOutput_blocks, pContext->m_ocl_total_pixel_blocks * sizeof(etc_block)))
964
goto exit;
965
966
status = true;
967
968
debug_printf("opencl_encode_etc1s_blocks: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
969
970
exit:
971
g_ocl.destroy_buffer(block_buf);
972
g_ocl.destroy_buffer(vars);
973
974
return status;
975
}
976
977
bool opencl_encode_etc1s_pixel_clusters(
978
opencl_context_ptr pContext,
979
etc_block* pOutput_blocks,
980
uint32_t total_clusters,
981
const cl_pixel_cluster* pClusters,
982
uint64_t total_pixels,
983
const color_rgba* pPixels, const uint32_t* pPixel_weights,
984
bool perceptual, uint32_t total_perms)
985
{
986
if (!opencl_is_available())
987
return false;
988
989
interval_timer tm;
990
tm.start();
991
992
cl_encode_etc1s_param_struct ps;
993
ps.m_total_blocks = total_clusters;
994
ps.m_perceptual = perceptual;
995
ps.m_total_perms = total_perms;
996
997
bool status = false;
998
999
if (sizeof(size_t) == sizeof(uint32_t))
1000
{
1001
if ( ((sizeof(cl_pixel_cluster) * total_clusters) > UINT32_MAX) ||
1002
((sizeof(color_rgba) * total_pixels) > UINT32_MAX) ||
1003
((sizeof(uint32_t) * total_pixels) > UINT32_MAX) )
1004
{
1005
return false;
1006
}
1007
}
1008
1009
cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps));
1010
cl_mem input_clusters = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pClusters, (size_t)(sizeof(cl_pixel_cluster) * total_clusters));
1011
cl_mem input_pixels = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixels, (size_t)(sizeof(color_rgba) * total_pixels));
1012
cl_mem weights_buf = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_weights, (size_t)(sizeof(uint32_t) * total_pixels));
1013
cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * total_clusters);
1014
1015
if (!vars || !input_clusters || !input_pixels || !weights_buf || !block_buf)
1016
goto exit;
1017
1018
if (!g_ocl.set_kernel_args(pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel, vars, input_clusters, input_pixels, weights_buf, block_buf))
1019
goto exit;
1020
1021
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel, total_clusters, 1))
1022
goto exit;
1023
1024
if (!g_ocl.read_from_buffer(pContext->m_command_queue, block_buf, pOutput_blocks, sizeof(etc_block) * total_clusters))
1025
goto exit;
1026
1027
status = true;
1028
1029
debug_printf("opencl_encode_etc1s_pixel_clusters: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1030
1031
exit:
1032
g_ocl.destroy_buffer(block_buf);
1033
g_ocl.destroy_buffer(weights_buf);
1034
g_ocl.destroy_buffer(input_pixels);
1035
g_ocl.destroy_buffer(input_clusters);
1036
g_ocl.destroy_buffer(vars);
1037
1038
return status;
1039
}
1040
1041
#pragma pack(push, 1)
1042
struct cl_rec_param_struct
1043
{
1044
int m_total_blocks;
1045
int m_perceptual;
1046
};
1047
#pragma pack(pop)
1048
1049
bool opencl_refine_endpoint_clusterization(
1050
opencl_context_ptr pContext,
1051
const cl_block_info_struct* pPixel_block_info,
1052
uint32_t total_clusters,
1053
const cl_endpoint_cluster_struct* pCluster_info,
1054
const uint32_t* pSorted_block_indices,
1055
uint32_t* pOutput_cluster_indices,
1056
bool perceptual)
1057
{
1058
if (!opencl_is_available())
1059
return false;
1060
1061
interval_timer tm;
1062
tm.start();
1063
1064
assert(pContext->m_ocl_pixel_blocks);
1065
if (!pContext->m_ocl_pixel_blocks)
1066
return false;
1067
1068
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
1069
1070
cl_rec_param_struct ps;
1071
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
1072
ps.m_perceptual = perceptual;
1073
1074
bool status = false;
1075
1076
cl_mem pixel_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_block_info, sizeof(cl_block_info_struct) * pContext->m_ocl_total_pixel_blocks);
1077
cl_mem cluster_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pCluster_info, sizeof(cl_endpoint_cluster_struct) * total_clusters);
1078
cl_mem sorted_block_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSorted_block_indices, sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks);
1079
cl_mem output_buf = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks);
1080
1081
if (!pixel_block_info || !cluster_info || !sorted_block_indices || !output_buf)
1082
goto exit;
1083
1084
if (!g_ocl.set_kernel_args(pContext->m_ocl_refine_endpoint_clusterization_kernel, ps, pContext->m_ocl_pixel_blocks, pixel_block_info, cluster_info, sorted_block_indices, output_buf))
1085
goto exit;
1086
1087
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_refine_endpoint_clusterization_kernel, pContext->m_ocl_total_pixel_blocks, 1))
1088
goto exit;
1089
1090
if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_buf, pOutput_cluster_indices, pContext->m_ocl_total_pixel_blocks * sizeof(uint32_t)))
1091
goto exit;
1092
1093
debug_printf("opencl_refine_endpoint_clusterization: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1094
1095
status = true;
1096
1097
exit:
1098
g_ocl.destroy_buffer(pixel_block_info);
1099
g_ocl.destroy_buffer(cluster_info);
1100
g_ocl.destroy_buffer(sorted_block_indices);
1101
g_ocl.destroy_buffer(output_buf);
1102
1103
return status;
1104
}
1105
1106
bool opencl_find_optimal_selector_clusters_for_each_block(
1107
opencl_context_ptr pContext,
1108
const fosc_block_struct* pInput_block_info, // one per block
1109
uint32_t total_input_selectors,
1110
const fosc_selector_struct* pInput_selectors,
1111
const uint32_t* pSelector_cluster_indices,
1112
uint32_t* pOutput_selector_cluster_indices, // one per block
1113
bool perceptual)
1114
{
1115
if (!opencl_is_available())
1116
return false;
1117
1118
interval_timer tm;
1119
tm.start();
1120
1121
assert(pContext->m_ocl_pixel_blocks);
1122
if (!pContext->m_ocl_pixel_blocks)
1123
return false;
1124
1125
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
1126
1127
fosc_param_struct ps;
1128
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
1129
ps.m_perceptual = perceptual;
1130
1131
bool status = false;
1132
1133
cl_mem input_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_block_info, sizeof(fosc_block_struct) * pContext->m_ocl_total_pixel_blocks);
1134
cl_mem input_selectors = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_selectors, sizeof(fosc_selector_struct) * total_input_selectors);
1135
cl_mem selector_cluster_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSelector_cluster_indices, sizeof(uint32_t) * total_input_selectors);
1136
cl_mem output_selector_cluster_indices = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks);
1137
1138
if (!input_block_info || !input_selectors || !selector_cluster_indices || !output_selector_cluster_indices)
1139
goto exit;
1140
1141
if (!g_ocl.set_kernel_args(pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel, ps, pContext->m_ocl_pixel_blocks, input_block_info, input_selectors, selector_cluster_indices, output_selector_cluster_indices))
1142
goto exit;
1143
1144
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel, pContext->m_ocl_total_pixel_blocks, 1))
1145
goto exit;
1146
1147
if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_selector_cluster_indices, pOutput_selector_cluster_indices, pContext->m_ocl_total_pixel_blocks * sizeof(uint32_t)))
1148
goto exit;
1149
1150
debug_printf("opencl_find_optimal_selector_clusters_for_each_block: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1151
1152
status = true;
1153
1154
exit:
1155
g_ocl.destroy_buffer(input_block_info);
1156
g_ocl.destroy_buffer(input_selectors);
1157
g_ocl.destroy_buffer(selector_cluster_indices);
1158
g_ocl.destroy_buffer(output_selector_cluster_indices);
1159
1160
return status;
1161
}
1162
1163
bool opencl_determine_selectors(
1164
opencl_context_ptr pContext,
1165
const color_rgba* pInput_etc_color5_and_inten,
1166
etc_block* pOutput_blocks,
1167
bool perceptual)
1168
{
1169
if (!opencl_is_available())
1170
return false;
1171
1172
interval_timer tm;
1173
tm.start();
1174
1175
assert(pContext->m_ocl_pixel_blocks);
1176
if (!pContext->m_ocl_pixel_blocks)
1177
return false;
1178
1179
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
1180
1181
ds_param_struct ps;
1182
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
1183
ps.m_perceptual = perceptual;
1184
1185
bool status = false;
1186
1187
cl_mem input_etc_color5_intens = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_etc_color5_and_inten, sizeof(color_rgba) * pContext->m_ocl_total_pixel_blocks);
1188
cl_mem output_blocks = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks);
1189
1190
if (!input_etc_color5_intens || !output_blocks)
1191
goto exit;
1192
1193
if (!g_ocl.set_kernel_args(pContext->m_ocl_determine_selectors_kernel, ps, pContext->m_ocl_pixel_blocks, input_etc_color5_intens, output_blocks))
1194
goto exit;
1195
1196
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_determine_selectors_kernel, pContext->m_ocl_total_pixel_blocks, 1))
1197
goto exit;
1198
1199
if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_blocks, pOutput_blocks, pContext->m_ocl_total_pixel_blocks * sizeof(etc_block)))
1200
goto exit;
1201
1202
debug_printf("opencl_determine_selectors: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1203
1204
status = true;
1205
1206
exit:
1207
g_ocl.destroy_buffer(input_etc_color5_intens);
1208
g_ocl.destroy_buffer(output_blocks);
1209
1210
return status;
1211
}
1212
1213
#else
1214
namespace basisu
1215
{
1216
// No OpenCL support - all dummy functions that return false;
1217
bool opencl_init(bool force_serialization)
1218
{
1219
BASISU_NOTE_UNUSED(force_serialization);
1220
1221
return false;
1222
}
1223
1224
void opencl_deinit()
1225
{
1226
}
1227
1228
bool opencl_is_available()
1229
{
1230
return false;
1231
}
1232
1233
opencl_context_ptr opencl_create_context()
1234
{
1235
return nullptr;
1236
}
1237
1238
void opencl_destroy_context(opencl_context_ptr context)
1239
{
1240
BASISU_NOTE_UNUSED(context);
1241
}
1242
1243
bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks)
1244
{
1245
BASISU_NOTE_UNUSED(pContext);
1246
BASISU_NOTE_UNUSED(total_blocks);
1247
BASISU_NOTE_UNUSED(pPixel_blocks);
1248
1249
return false;
1250
}
1251
1252
bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms)
1253
{
1254
BASISU_NOTE_UNUSED(pContext);
1255
BASISU_NOTE_UNUSED(pOutput_blocks);
1256
BASISU_NOTE_UNUSED(perceptual);
1257
BASISU_NOTE_UNUSED(total_perms);
1258
1259
return false;
1260
}
1261
1262
bool opencl_encode_etc1s_pixel_clusters(
1263
opencl_context_ptr pContext,
1264
etc_block* pOutput_blocks,
1265
uint32_t total_clusters,
1266
const cl_pixel_cluster* pClusters,
1267
uint64_t total_pixels,
1268
const color_rgba* pPixels, const uint32_t *pPixel_weights,
1269
bool perceptual, uint32_t total_perms)
1270
{
1271
BASISU_NOTE_UNUSED(pContext);
1272
BASISU_NOTE_UNUSED(pOutput_blocks);
1273
BASISU_NOTE_UNUSED(total_clusters);
1274
BASISU_NOTE_UNUSED(pClusters);
1275
BASISU_NOTE_UNUSED(total_pixels);
1276
BASISU_NOTE_UNUSED(pPixels);
1277
BASISU_NOTE_UNUSED(pPixel_weights);
1278
BASISU_NOTE_UNUSED(perceptual);
1279
BASISU_NOTE_UNUSED(total_perms);
1280
1281
return false;
1282
}
1283
1284
bool opencl_refine_endpoint_clusterization(
1285
opencl_context_ptr pContext,
1286
const cl_block_info_struct* pPixel_block_info,
1287
uint32_t total_clusters,
1288
const cl_endpoint_cluster_struct* pCluster_info,
1289
const uint32_t* pSorted_block_indices,
1290
uint32_t* pOutput_cluster_indices,
1291
bool perceptual)
1292
{
1293
BASISU_NOTE_UNUSED(pContext);
1294
BASISU_NOTE_UNUSED(pPixel_block_info);
1295
BASISU_NOTE_UNUSED(total_clusters);
1296
BASISU_NOTE_UNUSED(pCluster_info);
1297
BASISU_NOTE_UNUSED(pSorted_block_indices);
1298
BASISU_NOTE_UNUSED(pOutput_cluster_indices);
1299
BASISU_NOTE_UNUSED(perceptual);
1300
1301
return false;
1302
}
1303
1304
bool opencl_find_optimal_selector_clusters_for_each_block(
1305
opencl_context_ptr pContext,
1306
const fosc_block_struct* pInput_block_info, // one per block
1307
uint32_t total_input_selectors,
1308
const fosc_selector_struct* pInput_selectors,
1309
const uint32_t* pSelector_cluster_indices,
1310
uint32_t* pOutput_selector_cluster_indices, // one per block
1311
bool perceptual)
1312
{
1313
BASISU_NOTE_UNUSED(pContext);
1314
BASISU_NOTE_UNUSED(pInput_block_info);
1315
BASISU_NOTE_UNUSED(total_input_selectors);
1316
BASISU_NOTE_UNUSED(pInput_selectors);
1317
BASISU_NOTE_UNUSED(pSelector_cluster_indices);
1318
BASISU_NOTE_UNUSED(pOutput_selector_cluster_indices);
1319
BASISU_NOTE_UNUSED(perceptual);
1320
1321
return false;
1322
}
1323
1324
bool opencl_determine_selectors(
1325
opencl_context_ptr pContext,
1326
const color_rgba* pInput_etc_color5_and_inten,
1327
etc_block* pOutput_blocks,
1328
bool perceptual)
1329
{
1330
BASISU_NOTE_UNUSED(pContext);
1331
BASISU_NOTE_UNUSED(pInput_etc_color5_and_inten);
1332
BASISU_NOTE_UNUSED(pOutput_blocks);
1333
BASISU_NOTE_UNUSED(perceptual);
1334
1335
return false;
1336
}
1337
1338
#endif // BASISU_SUPPORT_OPENCL
1339
1340
} // namespace basisu
1341
1342