Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_opencl.cpp
20897 views
1
// basisu_opencl.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#include "basisu_opencl.h"
16
17
// If 1, the kernel source code will come from encoders/ocl_kernels.h. Otherwise, it will be read from the "ocl_kernels.cl" file in the current directory (for development).
18
#define BASISU_USE_OCL_KERNELS_HEADER (1)
19
#define BASISU_OCL_KERNELS_FILENAME "ocl_kernels.cl"
20
21
#if BASISU_SUPPORT_OPENCL
22
23
#include "basisu_enc.h"
24
25
// We only use OpenCL v1.2 or less.
26
#define CL_TARGET_OPENCL_VERSION 120
27
28
#ifdef __APPLE__
29
#include <OpenCL/opencl.h>
30
#else
31
#include <CL/cl.h>
32
#endif
33
34
#ifndef BASISU_OPENCL_ASSERT_ON_ANY_ERRORS
35
#define BASISU_OPENCL_ASSERT_ON_ANY_ERRORS (0)
36
#endif
37
38
namespace basisu
39
{
40
#if BASISU_USE_OCL_KERNELS_HEADER
41
#include "basisu_ocl_kernels.h"
42
#endif
43
44
static void ocl_error_printf(const char* pFmt, ...)
45
{
46
va_list args;
47
va_start(args, pFmt);
48
error_vprintf(pFmt, args);
49
va_end(args);
50
51
#if BASISU_OPENCL_ASSERT_ON_ANY_ERRORS
52
assert(0);
53
#endif
54
}
55
56
class ocl
57
{
58
public:
59
ocl()
60
{
61
memset(&m_dev_fp_config, 0, sizeof(m_dev_fp_config));
62
63
m_ocl_mutex.lock();
64
m_ocl_mutex.unlock();
65
}
66
67
~ocl()
68
{
69
}
70
71
bool is_initialized() const { return m_device_id != nullptr; }
72
73
cl_device_id get_device_id() const { return m_device_id; }
74
cl_context get_context() const { return m_context; }
75
cl_command_queue get_command_queue() { return m_command_queue; }
76
cl_program get_program() const { return m_program; }
77
78
bool init(bool force_serialization)
79
{
80
deinit();
81
82
interval_timer tm;
83
tm.start();
84
85
cl_uint num_platforms = 0;
86
cl_int ret = clGetPlatformIDs(0, NULL, &num_platforms);
87
if (ret != CL_SUCCESS)
88
{
89
ocl_error_printf("ocl::init: clGetPlatformIDs() failed with %i\n", ret);
90
return false;
91
}
92
93
if ((!num_platforms) || (num_platforms > INT_MAX))
94
{
95
ocl_error_printf("ocl::init: clGetPlatformIDs() returned an invalid number of num_platforms\n");
96
return false;
97
}
98
99
std::vector<cl_platform_id> platforms(num_platforms);
100
101
ret = clGetPlatformIDs(num_platforms, platforms.data(), NULL);
102
if (ret != CL_SUCCESS)
103
{
104
ocl_error_printf("ocl::init: clGetPlatformIDs() failed\n");
105
return false;
106
}
107
108
cl_uint num_devices = 0;
109
ret = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &m_device_id, &num_devices);
110
111
if (ret == CL_DEVICE_NOT_FOUND)
112
{
113
ocl_error_printf("ocl::init: Couldn't get any GPU device ID's, trying CL_DEVICE_TYPE_CPU\n");
114
115
ret = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &m_device_id, &num_devices);
116
}
117
118
if (ret != CL_SUCCESS)
119
{
120
ocl_error_printf("ocl::init: Unable to get any device ID's\n");
121
122
m_device_id = nullptr;
123
return false;
124
}
125
126
ret = clGetDeviceInfo(m_device_id,
127
CL_DEVICE_SINGLE_FP_CONFIG,
128
sizeof(m_dev_fp_config),
129
&m_dev_fp_config,
130
nullptr);
131
if (ret != CL_SUCCESS)
132
{
133
ocl_error_printf("ocl::init: clGetDeviceInfo() failed\n");
134
return false;
135
}
136
137
char plat_vers[256];
138
size_t rv = 0;
139
ret = clGetPlatformInfo(platforms[0], CL_PLATFORM_VERSION, sizeof(plat_vers), plat_vers, &rv);
140
if (ret == CL_SUCCESS)
141
printf("OpenCL platform version: \"%s\"\n", plat_vers);
142
143
// Serialize CL calls with the AMD driver to avoid lockups when multiple command queues per thread are used. This sucks, but what can we do?
144
m_use_mutex = (strstr(plat_vers, "AMD") != nullptr) || force_serialization;
145
146
printf("Serializing OpenCL calls across threads: %u\n", (uint32_t)m_use_mutex);
147
148
m_context = clCreateContext(nullptr, 1, &m_device_id, nullptr, nullptr, &ret);
149
if (ret != CL_SUCCESS)
150
{
151
ocl_error_printf("ocl::init: clCreateContext() failed\n");
152
153
m_device_id = nullptr;
154
m_context = nullptr;
155
return false;
156
}
157
158
m_command_queue = clCreateCommandQueue(m_context, m_device_id, 0, &ret);
159
if (ret != CL_SUCCESS)
160
{
161
ocl_error_printf("ocl::init: clCreateCommandQueue() failed\n");
162
163
deinit();
164
return false;
165
}
166
167
printf("OpenCL init time: %3.3f secs\n", tm.get_elapsed_secs());
168
169
return true;
170
}
171
172
bool deinit()
173
{
174
if (m_program)
175
{
176
clReleaseProgram(m_program);
177
m_program = nullptr;
178
}
179
180
if (m_command_queue)
181
{
182
clReleaseCommandQueue(m_command_queue);
183
m_command_queue = nullptr;
184
}
185
186
if (m_context)
187
{
188
clReleaseContext(m_context);
189
m_context = nullptr;
190
}
191
192
m_device_id = nullptr;
193
194
return true;
195
}
196
197
cl_command_queue create_command_queue()
198
{
199
cl_serializer serializer(this);
200
201
cl_int ret = 0;
202
cl_command_queue p = clCreateCommandQueue(m_context, m_device_id, 0, &ret);
203
if (ret != CL_SUCCESS)
204
return nullptr;
205
206
return p;
207
}
208
209
void destroy_command_queue(cl_command_queue p)
210
{
211
if (p)
212
{
213
cl_serializer serializer(this);
214
215
clReleaseCommandQueue(p);
216
}
217
}
218
219
bool init_program(const char* pSrc, size_t src_size)
220
{
221
cl_int ret;
222
223
if (m_program != nullptr)
224
{
225
clReleaseProgram(m_program);
226
m_program = nullptr;
227
}
228
229
m_program = clCreateProgramWithSource(m_context, 1, (const char**)&pSrc, (const size_t*)&src_size, &ret);
230
if (ret != CL_SUCCESS)
231
{
232
ocl_error_printf("ocl::init_program: clCreateProgramWithSource() failed!\n");
233
return false;
234
}
235
236
std::string options;
237
if (m_dev_fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
238
{
239
options += "-cl-fp32-correctly-rounded-divide-sqrt";
240
}
241
242
options += " -cl-std=CL1.2";
243
//options += " -cl-opt-disable";
244
//options += " -cl-mad-enable";
245
//options += " -cl-fast-relaxed-math";
246
247
ret = clBuildProgram(m_program, 1, &m_device_id,
248
options.size() ? options.c_str() : nullptr, // options
249
nullptr, // notify
250
nullptr); // user_data
251
252
if (ret != CL_SUCCESS)
253
{
254
const cl_int build_program_result = ret;
255
256
size_t ret_val_size;
257
ret = clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
258
if (ret != CL_SUCCESS)
259
{
260
ocl_error_printf("ocl::init_program: clGetProgramBuildInfo() failed!\n");
261
return false;
262
}
263
264
std::vector<char> build_log(ret_val_size + 1);
265
266
ret = clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log.data(), NULL);
267
268
ocl_error_printf("\nclBuildProgram() failed with error %i:\n%s", build_program_result, build_log.data());
269
270
return false;
271
}
272
273
return true;
274
}
275
276
cl_kernel create_kernel(const char* pName)
277
{
278
if (!m_program)
279
return nullptr;
280
281
cl_serializer serializer(this);
282
283
cl_int ret;
284
cl_kernel kernel = clCreateKernel(m_program, pName, &ret);
285
if (ret != CL_SUCCESS)
286
{
287
ocl_error_printf("ocl::create_kernel: clCreateKernel() failed!\n");
288
return nullptr;
289
}
290
291
return kernel;
292
}
293
294
bool destroy_kernel(cl_kernel k)
295
{
296
if (k)
297
{
298
cl_serializer serializer(this);
299
300
cl_int ret = clReleaseKernel(k);
301
if (ret != CL_SUCCESS)
302
{
303
ocl_error_printf("ocl::destroy_kernel: clReleaseKernel() failed!\n");
304
return false;
305
}
306
}
307
return true;
308
}
309
310
cl_mem alloc_read_buffer(size_t size)
311
{
312
cl_serializer serializer(this);
313
314
cl_int ret;
315
cl_mem obj = clCreateBuffer(m_context, CL_MEM_READ_ONLY, size, NULL, &ret);
316
if (ret != CL_SUCCESS)
317
{
318
ocl_error_printf("ocl::alloc_read_buffer: clCreateBuffer() failed!\n");
319
return nullptr;
320
}
321
322
return obj;
323
}
324
325
cl_mem alloc_and_init_read_buffer(cl_command_queue command_queue, const void *pInit, size_t size)
326
{
327
cl_serializer serializer(this);
328
329
cl_int ret;
330
cl_mem obj = clCreateBuffer(m_context, CL_MEM_READ_ONLY, size, NULL, &ret);
331
if (ret != CL_SUCCESS)
332
{
333
ocl_error_printf("ocl::alloc_and_init_read_buffer: clCreateBuffer() failed!\n");
334
return nullptr;
335
}
336
337
#if 0
338
if (!write_to_buffer(command_queue, obj, pInit, size))
339
{
340
destroy_buffer(obj);
341
return nullptr;
342
}
343
#else
344
ret = clEnqueueWriteBuffer(command_queue, obj, CL_TRUE, 0, size, pInit, 0, NULL, NULL);
345
if (ret != CL_SUCCESS)
346
{
347
ocl_error_printf("ocl::alloc_and_init_read_buffer: clEnqueueWriteBuffer() failed!\n");
348
return nullptr;
349
}
350
#endif
351
352
return obj;
353
}
354
355
cl_mem alloc_write_buffer(size_t size)
356
{
357
cl_serializer serializer(this);
358
359
cl_int ret;
360
cl_mem obj = clCreateBuffer(m_context, CL_MEM_WRITE_ONLY, size, NULL, &ret);
361
if (ret != CL_SUCCESS)
362
{
363
ocl_error_printf("ocl::alloc_write_buffer: clCreateBuffer() failed!\n");
364
return nullptr;
365
}
366
367
return obj;
368
}
369
370
bool destroy_buffer(cl_mem buf)
371
{
372
if (buf)
373
{
374
cl_serializer serializer(this);
375
376
cl_int ret = clReleaseMemObject(buf);
377
if (ret != CL_SUCCESS)
378
{
379
ocl_error_printf("ocl::destroy_buffer: clReleaseMemObject() failed!\n");
380
return false;
381
}
382
}
383
384
return true;
385
}
386
387
bool write_to_buffer(cl_command_queue command_queue, cl_mem clmem, const void* d, const size_t m)
388
{
389
cl_serializer serializer(this);
390
391
cl_int ret = clEnqueueWriteBuffer(command_queue, clmem, CL_TRUE, 0, m, d, 0, NULL, NULL);
392
if (ret != CL_SUCCESS)
393
{
394
ocl_error_printf("ocl::write_to_buffer: clEnqueueWriteBuffer() failed!\n");
395
return false;
396
}
397
398
return true;
399
}
400
401
bool read_from_buffer(cl_command_queue command_queue, const cl_mem clmem, void* d, size_t m)
402
{
403
cl_serializer serializer(this);
404
405
cl_int ret = clEnqueueReadBuffer(command_queue, clmem, CL_TRUE, 0, m, d, 0, NULL, NULL);
406
if (ret != CL_SUCCESS)
407
{
408
ocl_error_printf("ocl::read_from_buffer: clEnqueueReadBuffer() failed!\n");
409
return false;
410
}
411
412
return true;
413
}
414
415
cl_mem create_read_image_u8(uint32_t width, uint32_t height, const void* pPixels, uint32_t bytes_per_pixel, bool normalized)
416
{
417
cl_image_format fmt = get_image_format(bytes_per_pixel, normalized);
418
419
cl_image_desc desc;
420
memset(&desc, 0, sizeof(desc));
421
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
422
desc.image_width = width;
423
desc.image_height = height;
424
desc.image_row_pitch = width * bytes_per_pixel;
425
426
cl_serializer serializer(this);
427
428
cl_int ret;
429
cl_mem img = clCreateImage(m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &fmt, &desc, (void*)pPixels, &ret);
430
if (ret != CL_SUCCESS)
431
{
432
ocl_error_printf("ocl::create_read_image_u8: clCreateImage() failed!\n");
433
return nullptr;
434
}
435
436
return img;
437
}
438
439
cl_mem create_write_image_u8(uint32_t width, uint32_t height, uint32_t bytes_per_pixel, bool normalized)
440
{
441
cl_image_format fmt = get_image_format(bytes_per_pixel, normalized);
442
443
cl_image_desc desc;
444
memset(&desc, 0, sizeof(desc));
445
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
446
desc.image_width = width;
447
desc.image_height = height;
448
449
cl_serializer serializer(this);
450
451
cl_int ret;
452
cl_mem img = clCreateImage(m_context, CL_MEM_WRITE_ONLY, &fmt, &desc, nullptr, &ret);
453
if (ret != CL_SUCCESS)
454
{
455
ocl_error_printf("ocl::create_write_image_u8: clCreateImage() failed!\n");
456
return nullptr;
457
}
458
459
return img;
460
}
461
462
bool read_from_image(cl_command_queue command_queue, cl_mem img, void* pPixels, uint32_t ofs_x, uint32_t ofs_y, uint32_t width, uint32_t height)
463
{
464
cl_serializer serializer(this);
465
466
size_t origin[3] = { ofs_x, ofs_y, 0 }, region[3] = { width, height, 1 };
467
468
cl_int err = clEnqueueReadImage(command_queue, img, CL_TRUE, origin, region, 0, 0, pPixels, 0, NULL, NULL);
469
if (err != CL_SUCCESS)
470
{
471
ocl_error_printf("ocl::read_from_image: clEnqueueReadImage() failed!\n");
472
return false;
473
}
474
475
return true;
476
}
477
478
bool run_1D(cl_command_queue command_queue, const cl_kernel kernel, size_t num_items)
479
{
480
cl_serializer serializer(this);
481
482
cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel,
483
1, // work_dim
484
nullptr, // global_work_offset
485
&num_items, // global_work_size
486
nullptr, // local_work_size
487
0, // num_events_in_wait_list
488
nullptr, // event_wait_list
489
nullptr // event
490
);
491
492
if (ret != CL_SUCCESS)
493
{
494
ocl_error_printf("ocl::run_1D: clEnqueueNDRangeKernel() failed!\n");
495
return false;
496
}
497
498
return true;
499
}
500
501
bool run_2D(cl_command_queue command_queue, const cl_kernel kernel, size_t width, size_t height)
502
{
503
cl_serializer serializer(this);
504
505
size_t num_global_items[2] = { width, height };
506
//size_t num_local_items[2] = { 1, 1 };
507
508
cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel,
509
2, // work_dim
510
nullptr, // global_work_offset
511
num_global_items, // global_work_size
512
nullptr, // local_work_size
513
0, // num_events_in_wait_list
514
nullptr, // event_wait_list
515
nullptr // event
516
);
517
518
if (ret != CL_SUCCESS)
519
{
520
ocl_error_printf("ocl::run_2D: clEnqueueNDRangeKernel() failed!\n");
521
return false;
522
}
523
524
return true;
525
}
526
527
bool run_2D(cl_command_queue command_queue, const cl_kernel kernel, size_t ofs_x, size_t ofs_y, size_t width, size_t height)
528
{
529
cl_serializer serializer(this);
530
531
size_t global_ofs[2] = { ofs_x, ofs_y };
532
size_t num_global_items[2] = { width, height };
533
//size_t num_local_items[2] = { 1, 1 };
534
535
cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel,
536
2, // work_dim
537
global_ofs, // global_work_offset
538
num_global_items, // global_work_size
539
nullptr, // local_work_size
540
0, // num_events_in_wait_list
541
nullptr, // event_wait_list
542
nullptr // event
543
);
544
545
if (ret != CL_SUCCESS)
546
{
547
ocl_error_printf("ocl::run_2D: clEnqueueNDRangeKernel() failed!\n");
548
return false;
549
}
550
551
return true;
552
}
553
554
void flush(cl_command_queue command_queue)
555
{
556
cl_serializer serializer(this);
557
558
clFlush(command_queue);
559
clFinish(command_queue);
560
}
561
562
template<typename T>
563
bool set_kernel_arg(cl_kernel kernel, uint32_t index, const T& obj)
564
{
565
cl_serializer serializer(this);
566
567
cl_int ret = clSetKernelArg(kernel, index, sizeof(T), (void*)&obj);
568
if (ret != CL_SUCCESS)
569
{
570
ocl_error_printf("ocl::set_kernel_arg: clSetKernelArg() failed!\n");
571
return false;
572
}
573
return true;
574
}
575
576
template<typename T>
577
bool set_kernel_args(cl_kernel kernel, const T& obj1)
578
{
579
cl_serializer serializer(this);
580
581
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1);
582
if (ret != CL_SUCCESS)
583
{
584
ocl_error_printf("ocl::set_kernel_arg: clSetKernelArg() failed!\n");
585
return false;
586
}
587
return true;
588
}
589
590
#define BASISU_CHECK_ERR if (ret != CL_SUCCESS) { ocl_error_printf("ocl::set_kernel_args: clSetKernelArg() failed!\n"); return false; }
591
592
template<typename T, typename U>
593
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2)
594
{
595
cl_serializer serializer(this);
596
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
597
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
598
return true;
599
}
600
601
template<typename T, typename U, typename V>
602
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3)
603
{
604
cl_serializer serializer(this);
605
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
606
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
607
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
608
return true;
609
}
610
611
template<typename T, typename U, typename V, typename W>
612
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4)
613
{
614
cl_serializer serializer(this);
615
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
616
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
617
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
618
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
619
return true;
620
}
621
622
template<typename T, typename U, typename V, typename W, typename X>
623
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5)
624
{
625
cl_serializer serializer(this);
626
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
627
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
628
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
629
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
630
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
631
return true;
632
}
633
634
template<typename T, typename U, typename V, typename W, typename X, typename Y>
635
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6)
636
{
637
cl_serializer serializer(this);
638
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
639
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
640
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
641
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
642
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
643
ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR
644
return true;
645
}
646
647
template<typename T, typename U, typename V, typename W, typename X, typename Y, typename Z>
648
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6, const Z& obj7)
649
{
650
cl_serializer serializer(this);
651
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
652
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
653
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
654
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
655
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
656
ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR
657
ret = clSetKernelArg(kernel, 6, sizeof(Z), (void*)&obj7); BASISU_CHECK_ERR
658
return true;
659
}
660
661
template<typename T, typename U, typename V, typename W, typename X, typename Y, typename Z, typename A>
662
bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6, const Z& obj7, const A& obj8)
663
{
664
cl_serializer serializer(this);
665
cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR
666
ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR
667
ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR
668
ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR
669
ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR
670
ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR
671
ret = clSetKernelArg(kernel, 6, sizeof(Z), (void*)&obj7); BASISU_CHECK_ERR
672
ret = clSetKernelArg(kernel, 7, sizeof(A), (void*)&obj8); BASISU_CHECK_ERR
673
return true;
674
}
675
#undef BASISU_CHECK_ERR
676
677
private:
678
cl_device_id m_device_id = nullptr;
679
cl_context m_context = nullptr;
680
cl_command_queue m_command_queue = nullptr;
681
cl_program m_program = nullptr;
682
cl_device_fp_config m_dev_fp_config;
683
684
bool m_use_mutex = false;
685
std::mutex m_ocl_mutex;
686
687
// This helper object is used to optionally serialize all calls to the CL driver after initialization.
688
// Currently this is only used to work around race conditions in the Windows AMD driver.
689
struct cl_serializer
690
{
691
inline cl_serializer(const cl_serializer&);
692
cl_serializer& operator= (const cl_serializer&);
693
694
inline cl_serializer(ocl *p) : m_p(p)
695
{
696
if (m_p->m_use_mutex)
697
m_p->m_ocl_mutex.lock();
698
}
699
700
inline ~cl_serializer()
701
{
702
if (m_p->m_use_mutex)
703
m_p->m_ocl_mutex.unlock();
704
}
705
706
private:
707
ocl* m_p;
708
};
709
710
cl_image_format get_image_format(uint32_t bytes_per_pixel, bool normalized)
711
{
712
cl_image_format fmt;
713
switch (bytes_per_pixel)
714
{
715
case 1: fmt.image_channel_order = CL_LUMINANCE; break;
716
case 2: fmt.image_channel_order = CL_RG; break;
717
case 3: fmt.image_channel_order = CL_RGB; break;
718
case 4: fmt.image_channel_order = CL_RGBA; break;
719
default: assert(0); fmt.image_channel_order = CL_LUMINANCE; break;
720
}
721
722
fmt.image_channel_data_type = normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8;
723
return fmt;
724
}
725
};
726
727
// Library blobal state
728
ocl g_ocl;
729
730
bool opencl_init(bool force_serialization)
731
{
732
if (g_ocl.is_initialized())
733
{
734
assert(0);
735
return false;
736
}
737
738
if (!g_ocl.init(force_serialization))
739
{
740
ocl_error_printf("opencl_init: Failed initializing OpenCL\n");
741
return false;
742
}
743
744
const char* pKernel_src = nullptr;
745
size_t kernel_src_size = 0;
746
uint8_vec kernel_src;
747
748
#if BASISU_USE_OCL_KERNELS_HEADER
749
pKernel_src = reinterpret_cast<const char*>(ocl_kernels_cl);
750
kernel_src_size = ocl_kernels_cl_len;
751
#else
752
if (!read_file_to_vec(BASISU_OCL_KERNELS_FILENAME, kernel_src))
753
{
754
ocl_error_printf("opencl_init: Cannot read OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME);
755
g_ocl.deinit();
756
return false;
757
}
758
759
pKernel_src = (char*)kernel_src.data();
760
kernel_src_size = kernel_src.size();
761
#endif
762
763
if (!kernel_src_size)
764
{
765
ocl_error_printf("opencl_init: Invalid OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME);
766
g_ocl.deinit();
767
return false;
768
}
769
770
if (!g_ocl.init_program(pKernel_src, kernel_src_size))
771
{
772
ocl_error_printf("opencl_init: Failed compiling OpenCL program\n");
773
g_ocl.deinit();
774
return false;
775
}
776
777
printf("OpenCL support initialized successfully\n");
778
779
return true;
780
}
781
782
void opencl_deinit()
783
{
784
g_ocl.deinit();
785
}
786
787
bool opencl_is_available()
788
{
789
return g_ocl.is_initialized();
790
}
791
792
struct opencl_context
793
{
794
size_t m_ocl_total_pixel_blocks;
795
cl_mem m_ocl_pixel_blocks;
796
797
cl_command_queue m_command_queue;
798
799
cl_kernel m_ocl_encode_etc1s_blocks_kernel;
800
cl_kernel m_ocl_refine_endpoint_clusterization_kernel;
801
cl_kernel m_ocl_encode_etc1s_from_pixel_cluster_kernel;
802
cl_kernel m_ocl_find_optimal_selector_clusters_for_each_block_kernel;
803
cl_kernel m_ocl_determine_selectors_kernel;
804
};
805
806
opencl_context_ptr opencl_create_context()
807
{
808
if (!opencl_is_available())
809
{
810
ocl_error_printf("opencl_create_context: OpenCL not initialized\n");
811
assert(0);
812
return nullptr;
813
}
814
815
interval_timer tm;
816
tm.start();
817
818
opencl_context* pContext = static_cast<opencl_context * >(calloc(sizeof(opencl_context), 1));
819
if (!pContext)
820
return nullptr;
821
822
// To avoid driver bugs in some drivers - serialize this. Likely not necessary, we don't know.
823
// https://community.intel.com/t5/OpenCL-for-CPU/Bug-report-clCreateKernelsInProgram-is-not-thread-safe/td-p/1159771
824
825
pContext->m_command_queue = g_ocl.create_command_queue();
826
if (!pContext->m_command_queue)
827
{
828
ocl_error_printf("opencl_create_context: Failed creating OpenCL command queue!\n");
829
opencl_destroy_context(pContext);
830
return nullptr;
831
}
832
833
pContext->m_ocl_encode_etc1s_blocks_kernel = g_ocl.create_kernel("encode_etc1s_blocks");
834
if (!pContext->m_ocl_encode_etc1s_blocks_kernel)
835
{
836
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel encode_etc1s_block\n");
837
opencl_destroy_context(pContext);
838
return nullptr;
839
}
840
841
pContext->m_ocl_refine_endpoint_clusterization_kernel = g_ocl.create_kernel("refine_endpoint_clusterization");
842
if (!pContext->m_ocl_refine_endpoint_clusterization_kernel)
843
{
844
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel refine_endpoint_clusterization\n");
845
opencl_destroy_context(pContext);
846
return nullptr;
847
}
848
849
pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel = g_ocl.create_kernel("encode_etc1s_from_pixel_cluster");
850
if (!pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel)
851
{
852
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel encode_etc1s_from_pixel_cluster\n");
853
opencl_destroy_context(pContext);
854
return nullptr;
855
}
856
857
pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel = g_ocl.create_kernel("find_optimal_selector_clusters_for_each_block");
858
if (!pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel)
859
{
860
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel find_optimal_selector_clusters_for_each_block\n");
861
opencl_destroy_context(pContext);
862
return nullptr;
863
}
864
865
pContext->m_ocl_determine_selectors_kernel = g_ocl.create_kernel("determine_selectors");
866
if (!pContext->m_ocl_determine_selectors_kernel)
867
{
868
ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel determine_selectors\n");
869
opencl_destroy_context(pContext);
870
return nullptr;
871
}
872
873
debug_printf("opencl_create_context: Elapsed time: %f secs\n", tm.get_elapsed_secs());
874
875
return pContext;
876
}
877
878
void opencl_destroy_context(opencl_context_ptr pContext)
879
{
880
if (!pContext)
881
return;
882
883
interval_timer tm;
884
tm.start();
885
886
g_ocl.destroy_buffer(pContext->m_ocl_pixel_blocks);
887
888
g_ocl.destroy_kernel(pContext->m_ocl_determine_selectors_kernel);
889
g_ocl.destroy_kernel(pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel);
890
g_ocl.destroy_kernel(pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel);
891
g_ocl.destroy_kernel(pContext->m_ocl_encode_etc1s_blocks_kernel);
892
g_ocl.destroy_kernel(pContext->m_ocl_refine_endpoint_clusterization_kernel);
893
894
g_ocl.destroy_command_queue(pContext->m_command_queue);
895
896
memset(pContext, 0, sizeof(opencl_context));
897
898
free(pContext);
899
900
debug_printf("opencl_destroy_context: Elapsed time: %f secs\n", tm.get_elapsed_secs());
901
}
902
903
#pragma pack(push, 1)
904
struct cl_encode_etc1s_param_struct
905
{
906
int m_total_blocks;
907
int m_perceptual;
908
int m_total_perms;
909
};
910
#pragma pack(pop)
911
912
bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks)
913
{
914
if (!opencl_is_available())
915
return false;
916
917
if (pContext->m_ocl_pixel_blocks)
918
{
919
g_ocl.destroy_buffer(pContext->m_ocl_pixel_blocks);
920
pContext->m_ocl_pixel_blocks = nullptr;
921
}
922
923
pContext->m_ocl_pixel_blocks = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_blocks, sizeof(cl_pixel_block) * total_blocks);
924
if (!pContext->m_ocl_pixel_blocks)
925
return false;
926
927
pContext->m_ocl_total_pixel_blocks = total_blocks;
928
929
return true;
930
}
931
932
bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms)
933
{
934
if (!opencl_is_available())
935
return false;
936
937
interval_timer tm;
938
tm.start();
939
940
assert(pContext->m_ocl_pixel_blocks);
941
if (!pContext->m_ocl_pixel_blocks)
942
return false;
943
944
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
945
946
cl_encode_etc1s_param_struct ps;
947
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
948
ps.m_perceptual = perceptual;
949
ps.m_total_perms = total_perms;
950
951
bool status = false;
952
953
cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps));
954
cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks);
955
956
if (!vars || !block_buf)
957
goto exit;
958
959
if (!g_ocl.set_kernel_args(pContext->m_ocl_encode_etc1s_blocks_kernel, vars, pContext->m_ocl_pixel_blocks, block_buf))
960
goto exit;
961
962
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_encode_etc1s_blocks_kernel, pContext->m_ocl_total_pixel_blocks, 1))
963
goto exit;
964
965
if (!g_ocl.read_from_buffer(pContext->m_command_queue, block_buf, pOutput_blocks, pContext->m_ocl_total_pixel_blocks * sizeof(etc_block)))
966
goto exit;
967
968
status = true;
969
970
debug_printf("opencl_encode_etc1s_blocks: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
971
972
exit:
973
g_ocl.destroy_buffer(block_buf);
974
g_ocl.destroy_buffer(vars);
975
976
return status;
977
}
978
979
bool opencl_encode_etc1s_pixel_clusters(
980
opencl_context_ptr pContext,
981
etc_block* pOutput_blocks,
982
uint32_t total_clusters,
983
const cl_pixel_cluster* pClusters,
984
uint64_t total_pixels,
985
const color_rgba* pPixels, const uint32_t* pPixel_weights,
986
bool perceptual, uint32_t total_perms)
987
{
988
if (!opencl_is_available())
989
return false;
990
991
interval_timer tm;
992
tm.start();
993
994
cl_encode_etc1s_param_struct ps;
995
ps.m_total_blocks = total_clusters;
996
ps.m_perceptual = perceptual;
997
ps.m_total_perms = total_perms;
998
999
bool status = false;
1000
1001
if (sizeof(size_t) == sizeof(uint32_t))
1002
{
1003
if ( ((sizeof(cl_pixel_cluster) * total_clusters) > UINT32_MAX) ||
1004
((sizeof(color_rgba) * total_pixels) > UINT32_MAX) ||
1005
((sizeof(uint32_t) * total_pixels) > UINT32_MAX) )
1006
{
1007
return false;
1008
}
1009
}
1010
1011
cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps));
1012
cl_mem input_clusters = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pClusters, (size_t)(sizeof(cl_pixel_cluster) * total_clusters));
1013
cl_mem input_pixels = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixels, (size_t)(sizeof(color_rgba) * total_pixels));
1014
cl_mem weights_buf = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_weights, (size_t)(sizeof(uint32_t) * total_pixels));
1015
cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * total_clusters);
1016
1017
if (!vars || !input_clusters || !input_pixels || !weights_buf || !block_buf)
1018
goto exit;
1019
1020
if (!g_ocl.set_kernel_args(pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel, vars, input_clusters, input_pixels, weights_buf, block_buf))
1021
goto exit;
1022
1023
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel, total_clusters, 1))
1024
goto exit;
1025
1026
if (!g_ocl.read_from_buffer(pContext->m_command_queue, block_buf, pOutput_blocks, sizeof(etc_block) * total_clusters))
1027
goto exit;
1028
1029
status = true;
1030
1031
debug_printf("opencl_encode_etc1s_pixel_clusters: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1032
1033
exit:
1034
g_ocl.destroy_buffer(block_buf);
1035
g_ocl.destroy_buffer(weights_buf);
1036
g_ocl.destroy_buffer(input_pixels);
1037
g_ocl.destroy_buffer(input_clusters);
1038
g_ocl.destroy_buffer(vars);
1039
1040
return status;
1041
}
1042
1043
#pragma pack(push, 1)
1044
struct cl_rec_param_struct
1045
{
1046
int m_total_blocks;
1047
int m_perceptual;
1048
};
1049
#pragma pack(pop)
1050
1051
bool opencl_refine_endpoint_clusterization(
1052
opencl_context_ptr pContext,
1053
const cl_block_info_struct* pPixel_block_info,
1054
uint32_t total_clusters,
1055
const cl_endpoint_cluster_struct* pCluster_info,
1056
const uint32_t* pSorted_block_indices,
1057
uint32_t* pOutput_cluster_indices,
1058
bool perceptual)
1059
{
1060
if (!opencl_is_available())
1061
return false;
1062
1063
interval_timer tm;
1064
tm.start();
1065
1066
assert(pContext->m_ocl_pixel_blocks);
1067
if (!pContext->m_ocl_pixel_blocks)
1068
return false;
1069
1070
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
1071
1072
cl_rec_param_struct ps;
1073
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
1074
ps.m_perceptual = perceptual;
1075
1076
bool status = false;
1077
1078
cl_mem pixel_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_block_info, sizeof(cl_block_info_struct) * pContext->m_ocl_total_pixel_blocks);
1079
cl_mem cluster_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pCluster_info, sizeof(cl_endpoint_cluster_struct) * total_clusters);
1080
cl_mem sorted_block_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSorted_block_indices, sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks);
1081
cl_mem output_buf = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks);
1082
1083
if (!pixel_block_info || !cluster_info || !sorted_block_indices || !output_buf)
1084
goto exit;
1085
1086
if (!g_ocl.set_kernel_args(pContext->m_ocl_refine_endpoint_clusterization_kernel, ps, pContext->m_ocl_pixel_blocks, pixel_block_info, cluster_info, sorted_block_indices, output_buf))
1087
goto exit;
1088
1089
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_refine_endpoint_clusterization_kernel, pContext->m_ocl_total_pixel_blocks, 1))
1090
goto exit;
1091
1092
if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_buf, pOutput_cluster_indices, pContext->m_ocl_total_pixel_blocks * sizeof(uint32_t)))
1093
goto exit;
1094
1095
debug_printf("opencl_refine_endpoint_clusterization: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1096
1097
status = true;
1098
1099
exit:
1100
g_ocl.destroy_buffer(pixel_block_info);
1101
g_ocl.destroy_buffer(cluster_info);
1102
g_ocl.destroy_buffer(sorted_block_indices);
1103
g_ocl.destroy_buffer(output_buf);
1104
1105
return status;
1106
}
1107
1108
bool opencl_find_optimal_selector_clusters_for_each_block(
1109
opencl_context_ptr pContext,
1110
const fosc_block_struct* pInput_block_info, // one per block
1111
uint32_t total_input_selectors,
1112
const fosc_selector_struct* pInput_selectors,
1113
const uint32_t* pSelector_cluster_indices,
1114
uint32_t* pOutput_selector_cluster_indices, // one per block
1115
bool perceptual)
1116
{
1117
if (!opencl_is_available())
1118
return false;
1119
1120
interval_timer tm;
1121
tm.start();
1122
1123
assert(pContext->m_ocl_pixel_blocks);
1124
if (!pContext->m_ocl_pixel_blocks)
1125
return false;
1126
1127
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
1128
1129
fosc_param_struct ps;
1130
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
1131
ps.m_perceptual = perceptual;
1132
1133
bool status = false;
1134
1135
cl_mem input_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_block_info, sizeof(fosc_block_struct) * pContext->m_ocl_total_pixel_blocks);
1136
cl_mem input_selectors = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_selectors, sizeof(fosc_selector_struct) * total_input_selectors);
1137
cl_mem selector_cluster_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSelector_cluster_indices, sizeof(uint32_t) * total_input_selectors);
1138
cl_mem output_selector_cluster_indices = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks);
1139
1140
if (!input_block_info || !input_selectors || !selector_cluster_indices || !output_selector_cluster_indices)
1141
goto exit;
1142
1143
if (!g_ocl.set_kernel_args(pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel, ps, pContext->m_ocl_pixel_blocks, input_block_info, input_selectors, selector_cluster_indices, output_selector_cluster_indices))
1144
goto exit;
1145
1146
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel, pContext->m_ocl_total_pixel_blocks, 1))
1147
goto exit;
1148
1149
if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_selector_cluster_indices, pOutput_selector_cluster_indices, pContext->m_ocl_total_pixel_blocks * sizeof(uint32_t)))
1150
goto exit;
1151
1152
debug_printf("opencl_find_optimal_selector_clusters_for_each_block: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1153
1154
status = true;
1155
1156
exit:
1157
g_ocl.destroy_buffer(input_block_info);
1158
g_ocl.destroy_buffer(input_selectors);
1159
g_ocl.destroy_buffer(selector_cluster_indices);
1160
g_ocl.destroy_buffer(output_selector_cluster_indices);
1161
1162
return status;
1163
}
1164
1165
bool opencl_determine_selectors(
1166
opencl_context_ptr pContext,
1167
const color_rgba* pInput_etc_color5_and_inten,
1168
etc_block* pOutput_blocks,
1169
bool perceptual)
1170
{
1171
if (!opencl_is_available())
1172
return false;
1173
1174
interval_timer tm;
1175
tm.start();
1176
1177
assert(pContext->m_ocl_pixel_blocks);
1178
if (!pContext->m_ocl_pixel_blocks)
1179
return false;
1180
1181
assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
1182
1183
ds_param_struct ps;
1184
ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
1185
ps.m_perceptual = perceptual;
1186
1187
bool status = false;
1188
1189
cl_mem input_etc_color5_intens = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_etc_color5_and_inten, sizeof(color_rgba) * pContext->m_ocl_total_pixel_blocks);
1190
cl_mem output_blocks = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks);
1191
1192
if (!input_etc_color5_intens || !output_blocks)
1193
goto exit;
1194
1195
if (!g_ocl.set_kernel_args(pContext->m_ocl_determine_selectors_kernel, ps, pContext->m_ocl_pixel_blocks, input_etc_color5_intens, output_blocks))
1196
goto exit;
1197
1198
if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_determine_selectors_kernel, pContext->m_ocl_total_pixel_blocks, 1))
1199
goto exit;
1200
1201
if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_blocks, pOutput_blocks, pContext->m_ocl_total_pixel_blocks * sizeof(etc_block)))
1202
goto exit;
1203
1204
debug_printf("opencl_determine_selectors: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1205
1206
status = true;
1207
1208
exit:
1209
g_ocl.destroy_buffer(input_etc_color5_intens);
1210
g_ocl.destroy_buffer(output_blocks);
1211
1212
return status;
1213
}
1214
1215
#else
1216
namespace basisu
1217
{
1218
// No OpenCL support - all dummy functions that return false;
1219
bool opencl_init(bool force_serialization)
1220
{
1221
BASISU_NOTE_UNUSED(force_serialization);
1222
1223
return false;
1224
}
1225
1226
void opencl_deinit()
1227
{
1228
}
1229
1230
bool opencl_is_available()
1231
{
1232
return false;
1233
}
1234
1235
opencl_context_ptr opencl_create_context()
1236
{
1237
return nullptr;
1238
}
1239
1240
void opencl_destroy_context(opencl_context_ptr context)
1241
{
1242
BASISU_NOTE_UNUSED(context);
1243
}
1244
1245
bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks)
1246
{
1247
BASISU_NOTE_UNUSED(pContext);
1248
BASISU_NOTE_UNUSED(total_blocks);
1249
BASISU_NOTE_UNUSED(pPixel_blocks);
1250
1251
return false;
1252
}
1253
1254
bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms)
1255
{
1256
BASISU_NOTE_UNUSED(pContext);
1257
BASISU_NOTE_UNUSED(pOutput_blocks);
1258
BASISU_NOTE_UNUSED(perceptual);
1259
BASISU_NOTE_UNUSED(total_perms);
1260
1261
return false;
1262
}
1263
1264
bool opencl_encode_etc1s_pixel_clusters(
1265
opencl_context_ptr pContext,
1266
etc_block* pOutput_blocks,
1267
uint32_t total_clusters,
1268
const cl_pixel_cluster* pClusters,
1269
uint64_t total_pixels,
1270
const color_rgba* pPixels, const uint32_t *pPixel_weights,
1271
bool perceptual, uint32_t total_perms)
1272
{
1273
BASISU_NOTE_UNUSED(pContext);
1274
BASISU_NOTE_UNUSED(pOutput_blocks);
1275
BASISU_NOTE_UNUSED(total_clusters);
1276
BASISU_NOTE_UNUSED(pClusters);
1277
BASISU_NOTE_UNUSED(total_pixels);
1278
BASISU_NOTE_UNUSED(pPixels);
1279
BASISU_NOTE_UNUSED(pPixel_weights);
1280
BASISU_NOTE_UNUSED(perceptual);
1281
BASISU_NOTE_UNUSED(total_perms);
1282
1283
return false;
1284
}
1285
1286
bool opencl_refine_endpoint_clusterization(
1287
opencl_context_ptr pContext,
1288
const cl_block_info_struct* pPixel_block_info,
1289
uint32_t total_clusters,
1290
const cl_endpoint_cluster_struct* pCluster_info,
1291
const uint32_t* pSorted_block_indices,
1292
uint32_t* pOutput_cluster_indices,
1293
bool perceptual)
1294
{
1295
BASISU_NOTE_UNUSED(pContext);
1296
BASISU_NOTE_UNUSED(pPixel_block_info);
1297
BASISU_NOTE_UNUSED(total_clusters);
1298
BASISU_NOTE_UNUSED(pCluster_info);
1299
BASISU_NOTE_UNUSED(pSorted_block_indices);
1300
BASISU_NOTE_UNUSED(pOutput_cluster_indices);
1301
BASISU_NOTE_UNUSED(perceptual);
1302
1303
return false;
1304
}
1305
1306
bool opencl_find_optimal_selector_clusters_for_each_block(
1307
opencl_context_ptr pContext,
1308
const fosc_block_struct* pInput_block_info, // one per block
1309
uint32_t total_input_selectors,
1310
const fosc_selector_struct* pInput_selectors,
1311
const uint32_t* pSelector_cluster_indices,
1312
uint32_t* pOutput_selector_cluster_indices, // one per block
1313
bool perceptual)
1314
{
1315
BASISU_NOTE_UNUSED(pContext);
1316
BASISU_NOTE_UNUSED(pInput_block_info);
1317
BASISU_NOTE_UNUSED(total_input_selectors);
1318
BASISU_NOTE_UNUSED(pInput_selectors);
1319
BASISU_NOTE_UNUSED(pSelector_cluster_indices);
1320
BASISU_NOTE_UNUSED(pOutput_selector_cluster_indices);
1321
BASISU_NOTE_UNUSED(perceptual);
1322
1323
return false;
1324
}
1325
1326
bool opencl_determine_selectors(
1327
opencl_context_ptr pContext,
1328
const color_rgba* pInput_etc_color5_and_inten,
1329
etc_block* pOutput_blocks,
1330
bool perceptual)
1331
{
1332
BASISU_NOTE_UNUSED(pContext);
1333
BASISU_NOTE_UNUSED(pInput_etc_color5_and_inten);
1334
BASISU_NOTE_UNUSED(pOutput_blocks);
1335
BASISU_NOTE_UNUSED(perceptual);
1336
1337
return false;
1338
}
1339
1340
#endif // BASISU_SUPPORT_OPENCL
1341
1342
} // namespace basisu
1343
1344