Line data Source code
1 : /**
2 : Copyright (c) 2021-2022 Roman Katuntsev <sbkarr@stappler.org>
3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
4 :
5 : Permission is hereby granted, free of charge, to any person obtaining a copy
6 : of this software and associated documentation files (the "Software"), to deal
7 : in the Software without restriction, including without limitation the rights
8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 : copies of the Software, and to permit persons to whom the Software is
10 : furnished to do so, subject to the following conditions:
11 :
12 : The above copyright notice and this permission notice shall be included in
13 : all copies or substantial portions of the Software.
14 :
15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 : THE SOFTWARE.
22 : **/
23 :
24 : #include "XLVkDevice.h"
25 : #include "XLVkPipeline.h"
26 : #include "XLVkTextureSet.h"
27 : #include "XLVkLoop.h"
28 : #include "XLVkAllocator.h"
29 : #include "XLCoreFrameHandle.h"
30 : #include "XLCoreFrameRequest.h"
31 : #include "XLVkRenderPass.h"
32 :
33 : #ifndef XL_VKDEVICE_LOG
34 : #define XL_VKDEVICE_LOG(...)
35 : #endif
36 :
37 : namespace STAPPLER_VERSIONIZED stappler::xenolith::vk {
38 :
39 74456 : DeviceFrameHandle::~DeviceFrameHandle() {
40 37228 : if (!_valid) {
41 : #if XL_VK_FINALIZE_INVALID_FRAMES
42 : auto dev = (Device *)_device;
43 : dev->getTable()->vkDeviceWaitIdle(dev->getDevice());
44 : #endif
45 : }
46 37228 : _memPools.clear();
47 74456 : }
48 :
49 37228 : bool DeviceFrameHandle::init(Loop &loop, Device &device, Rc<FrameRequest> &&req, uint64_t gen) {
50 37228 : if (!core::FrameHandle::init(loop, device, move(req), gen)) {
51 0 : return false;
52 : }
53 :
54 37228 : _allocator = device.getAllocator();
55 37228 : return true;
56 : }
57 :
58 253859 : const Rc<DeviceMemoryPool> &DeviceFrameHandle::getMemPool(void *key) {
59 253859 : std::unique_lock<Mutex> lock(_mutex);
60 : // experimental: multiple pools feature is disabled, advanced memory mapping protection can replace it completely
61 253859 : auto v = _memPools.find((void *)nullptr);
62 253859 : if (v == _memPools.end()) {
63 37019 : v = _memPools.emplace((void *)nullptr, Rc<DeviceMemoryPool>::create(_allocator, _request->isPersistentMapping())).first;
64 : }
65 507717 : return v->second;
66 253859 : }
67 :
68 42 : Device::Device() { }
69 :
70 84 : Device::~Device() {
71 42 : if (_vkInstance && _device) {
72 42 : if (_allocator) {
73 42 : _allocator->invalidate(*this);
74 42 : _allocator = nullptr;
75 : }
76 :
77 42 : if (_textureSetLayout) {
78 42 : _textureSetLayout->invalidate(*this);
79 42 : _textureSetLayout = nullptr;
80 : }
81 :
82 42 : clearShaders();
83 42 : invalidateObjects();
84 :
85 42 : _table->vkDestroyDevice(_device, nullptr);
86 42 : delete _table;
87 :
88 42 : _device = nullptr;
89 42 : _table = nullptr;
90 : }
91 : XL_VKDEVICE_LOG("~Device");
92 84 : }
93 :
94 42 : bool Device::init(const vk::Instance *inst, DeviceInfo && info, const Features &features, const Vector<StringView> &extensions) {
95 42 : Set<uint32_t> uniqueQueueFamilies = { info.graphicsFamily.index, info.presentFamily.index, info.transferFamily.index, info.computeFamily.index };
96 :
97 294 : auto emplaceQueueFamily = [&, this] (DeviceInfo::QueueFamilyInfo &info, uint32_t count, QueueOperations preferred) {
98 294 : for (auto &it : _families) {
99 168 : if (it.index == info.index) {
100 42 : it.preferred |= preferred;
101 42 : it.count = std::min(it.count + count, std::min(info.count, uint32_t(std::thread::hardware_concurrency())));
102 42 : return;
103 : }
104 : }
105 126 : count = std::min(count, std::min(info.count, uint32_t(std::thread::hardware_concurrency())));
106 126 : _families.emplace_back(DeviceQueueFamily({ info.index, count, preferred, info.ops, info.minImageTransferGranularity}));
107 42 : };
108 :
109 42 : _presentMask = info.presentFamily.presentSurfaceMask;
110 :
111 42 : info.presentFamily.count = 1;
112 :
113 42 : emplaceQueueFamily(info.graphicsFamily, std::thread::hardware_concurrency(), QueueOperations::Graphics);
114 42 : emplaceQueueFamily(info.presentFamily, 1, QueueOperations::Present);
115 42 : emplaceQueueFamily(info.transferFamily, 2, QueueOperations::Transfer);
116 42 : emplaceQueueFamily(info.computeFamily, std::thread::hardware_concurrency(), QueueOperations::Compute);
117 :
118 42 : if (!setup(inst, info.device, info.properties, _families, features, extensions)) {
119 0 : return false;
120 : }
121 :
122 42 : if (!core::Device::init(inst)) {
123 0 : return false;
124 : }
125 :
126 42 : _vkInstance = inst;
127 42 : _info = move(info);
128 :
129 : if constexpr (s_printVkInfo) {
130 42 : log::verbose("Vk-Info", "Device info:\n", info.description());
131 : }
132 :
133 168 : for (auto &it : _families) {
134 126 : it.queues.reserve(it.count);
135 126 : it.pools.reserve(it.count);
136 546 : for (size_t i = 0; i < it.count; ++ i) {
137 420 : VkQueue queue = VK_NULL_HANDLE;
138 420 : getTable()->vkGetDeviceQueue(_device, it.index, i, &queue);
139 :
140 420 : it.queues.emplace_back(Rc<DeviceQueue>::create(*this, queue, it.index, it.ops));
141 420 : it.pools.emplace_back(Rc<CommandPool>::create(*this, it.index, it.preferred));
142 : }
143 : }
144 :
145 42 : _allocator = Rc<Allocator>::create(*this, _info.device, _info.features, _info.properties);
146 :
147 42 : auto maxDescriptors = _info.properties.device10.properties.limits.maxPerStageDescriptorSampledImages;
148 42 : auto maxResources = _info.properties.device10.properties.limits.maxPerStageResources - 16;
149 :
150 42 : auto imageLimit = std::min(maxResources, maxDescriptors);
151 42 : auto bufferLimit = std::min(info.properties.device10.properties.limits.maxPerStageDescriptorStorageBuffers, maxDescriptors);
152 42 : if (!_info.features.device10.features.shaderSampledImageArrayDynamicIndexing) {
153 0 : imageLimit = 1;
154 : }
155 42 : _textureLayoutImagesCount = imageLimit = std::min(imageLimit, config::MaxTextureSetImages) - 2;
156 42 : _textureLayoutBuffersCount = bufferLimit = std::min(bufferLimit, config::MaxBufferArrayObjects);
157 42 : _textureSetLayout = Rc<TextureSetLayout>::create(*this, imageLimit, bufferLimit);
158 :
159 : do {
160 : VkFormatProperties properties;
161 :
162 840 : auto addDepthFormat = [&, this] (VkFormat fmt) {
163 294 : _vkInstance->vkGetPhysicalDeviceFormatProperties(_info.device, fmt, &properties);
164 294 : _formats.emplace(fmt, properties);
165 294 : if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0) {
166 252 : _depthFormats.emplace_back(core::ImageFormat(fmt));
167 : }
168 294 : };
169 :
170 462 : auto addColorFormat = [&, this] (VkFormat fmt) {
171 168 : _vkInstance->vkGetPhysicalDeviceFormatProperties(_info.device, fmt, &properties);
172 168 : _formats.emplace(fmt, properties);
173 168 : if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) != 0 && (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_TRANSFER_DST_BIT) != 0) {
174 126 : _colorFormats.emplace_back(core::ImageFormat(fmt));
175 : }
176 168 : };
177 :
178 42 : addDepthFormat(VK_FORMAT_D16_UNORM);
179 42 : addDepthFormat(VK_FORMAT_X8_D24_UNORM_PACK32);
180 42 : addDepthFormat(VK_FORMAT_D32_SFLOAT);
181 42 : addDepthFormat(VK_FORMAT_S8_UINT);
182 42 : addDepthFormat(VK_FORMAT_D16_UNORM_S8_UINT);
183 42 : addDepthFormat(VK_FORMAT_D24_UNORM_S8_UINT);
184 42 : addDepthFormat(VK_FORMAT_D32_SFLOAT_S8_UINT);
185 :
186 42 : addColorFormat(VK_FORMAT_R8_UNORM);
187 42 : addColorFormat(VK_FORMAT_R8G8_UNORM);
188 42 : addColorFormat(VK_FORMAT_R8G8B8_UNORM);
189 42 : addColorFormat(VK_FORMAT_R8G8B8A8_UNORM);
190 : } while (0);
191 :
192 42 : return true;
193 42 : }
194 :
195 63 : VkPhysicalDevice Device::getPhysicalDevice() const {
196 63 : return _info.device;
197 : }
198 :
199 42 : void Device::begin(Loop &loop, thread::TaskQueue &q, Function<void(bool)> &&cb) {
200 42 : compileSamplers(q, true);
201 42 : _textureSetLayout->compile(*this, _immutableSamplers);
202 42 : _textureSetLayout->initDefault(*this, loop, move(cb));
203 42 : _loopThreadId = std::this_thread::get_id();
204 42 : }
205 :
206 42 : void Device::end() {
207 168 : for (auto &it : _families) {
208 567 : for (auto &b : it.pools) {
209 441 : b->invalidate(*this);
210 : }
211 126 : it.pools.clear();
212 : }
213 :
214 42 : _finished = true;
215 :
216 126 : for (auto &it : _samplers) {
217 84 : it->invalidate();
218 : }
219 42 : _samplers.clear();
220 42 : }
221 :
222 : #if VK_HOOK_DEBUG
223 : static thread_local uint64_t s_vkFnCallStart = 0;
224 : #endif
225 :
226 112975246 : const DeviceTable * Device::getTable() const {
227 : #if VK_HOOK_DEBUG
228 : setDeviceHookThreadContext([] (void *ctx, const char *name, PFN_vkVoidFunction fn) {
229 : s_vkFnCallStart = platform::device::_clock();
230 : }, [] (void *ctx, const char *name, PFN_vkVoidFunction fn) {
231 : auto dt = platform::device::_clock() - s_vkFnCallStart;
232 : if (dt > 200000) {
233 : log::debug("Vk-Call-Timeout", name, ": ", dt);
234 : }
235 : }, _original, nullptr, (void *)this);
236 : #endif
237 :
238 112975246 : return _table;
239 : }
240 :
241 0 : const DeviceQueueFamily *Device::getQueueFamily(uint32_t familyIdx) const {
242 0 : for (auto &it : _families) {
243 0 : if (it.index == familyIdx) {
244 0 : return ⁢
245 : }
246 : }
247 0 : return nullptr;
248 : }
249 :
250 418669 : const DeviceQueueFamily *Device::getQueueFamily(QueueOperations ops) const {
251 1290876 : for (auto &it : _families) {
252 1250213 : if (it.preferred == ops) {
253 378006 : return ⁢
254 : }
255 : }
256 40663 : for (auto &it : _families) {
257 40663 : if ((it.ops & ops) != QueueOperations::None) {
258 40663 : return ⁢
259 : }
260 : }
261 0 : return nullptr;
262 : }
263 :
264 42 : const DeviceQueueFamily *Device::getQueueFamily(core::PassType type) const {
265 42 : switch (type) {
266 42 : case core::PassType::Graphics:
267 42 : return getQueueFamily(QueueOperations::Graphics);
268 : break;
269 0 : case core::PassType::Compute:
270 0 : return getQueueFamily(QueueOperations::Compute);
271 : break;
272 0 : case core::PassType::Transfer:
273 0 : return getQueueFamily(QueueOperations::Transfer);
274 : break;
275 0 : case core::PassType::Generic:
276 0 : return nullptr;
277 : break;
278 : }
279 0 : return nullptr;
280 : }
281 :
282 0 : const Vector<DeviceQueueFamily> &Device::getQueueFamilies() const {
283 0 : return _families;
284 : }
285 :
286 1483 : Rc<DeviceQueue> Device::tryAcquireQueueSync(QueueOperations ops, bool lockThread) {
287 1483 : auto family = (DeviceQueueFamily *)getQueueFamily(ops);
288 1483 : if (!family) {
289 0 : return nullptr;
290 : }
291 :
292 1483 : std::unique_lock<Mutex> lock(_resourceMutex);
293 1483 : if (lockThread) {
294 21 : ++ _resourceQueueWaiters;
295 21 : while (family->queues.empty()) {
296 0 : _resourceQueueCond.wait(lock);
297 : }
298 21 : -- _resourceQueueWaiters;
299 : }
300 1483 : if (!family->queues.empty()) {
301 : XL_VKDEVICE_LOG("tryAcquireQueueSync ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
302 1483 : auto queue = move(family->queues.back());
303 1483 : family->queues.pop_back();
304 1483 : return queue;
305 1483 : }
306 0 : return nullptr;
307 1483 : }
308 :
309 172577 : bool Device::acquireQueue(QueueOperations ops, FrameHandle &handle, Function<void(FrameHandle &, const Rc<DeviceQueue> &)> && acquire,
310 : Function<void(FrameHandle &)> && invalidate, Rc<Ref> &&ref) {
311 :
312 172577 : auto family = (DeviceQueueFamily *)getQueueFamily(ops);
313 172577 : if (!family) {
314 0 : return false;
315 : }
316 :
317 172577 : std::unique_lock<Mutex> lock(_resourceMutex);
318 172577 : Rc<DeviceQueue> queue;
319 172577 : if (!family->queues.empty()) {
320 172576 : queue = move(family->queues.back());
321 172576 : family->queues.pop_back();
322 : } else {
323 : XL_VKDEVICE_LOG("acquireQueue-wait ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
324 1 : family->waiters.emplace_back(DeviceQueueFamily::Waiter(move(acquire), move(invalidate), &handle, move(ref)));
325 : }
326 :
327 172577 : if (queue) {
328 172576 : queue->setOwner(handle);
329 : XL_VKDEVICE_LOG("acquireQueue ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
330 172576 : acquire(handle, queue);
331 : }
332 172577 : return true;
333 172577 : }
334 :
335 33339 : bool Device::acquireQueue(QueueOperations ops, Loop &loop, Function<void(Loop &, const Rc<DeviceQueue> &)> && acquire,
336 : Function<void(Loop &)> && invalidate, Rc<Ref> &&ref) {
337 :
338 33339 : auto family = (DeviceQueueFamily *)getQueueFamily(ops);
339 33339 : if (!family) {
340 0 : return false;
341 : }
342 :
343 33339 : std::unique_lock<Mutex> lock(_resourceMutex);
344 33339 : Rc<DeviceQueue> queue;
345 33339 : if (!family->queues.empty()) {
346 33339 : queue = move(family->queues.back());
347 33339 : family->queues.pop_back();
348 : } else {
349 : XL_VKDEVICE_LOG("acquireQueue-wait ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
350 0 : family->waiters.emplace_back(DeviceQueueFamily::Waiter(move(acquire), move(invalidate), &loop, move(ref)));
351 : }
352 :
353 33339 : lock.unlock();
354 :
355 33339 : if (queue) {
356 : XL_VKDEVICE_LOG("acquireQueue ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
357 33339 : acquire(loop, queue);
358 : }
359 33339 : return true;
360 33339 : }
361 :
362 207399 : void Device::releaseQueue(Rc<DeviceQueue> &&queue) {
363 207399 : DeviceQueueFamily *family = nullptr;
364 581639 : for (auto &it : _families) {
365 581639 : if (it.index == queue->getIndex()) {
366 207399 : family = ⁢
367 207399 : break;
368 : }
369 : }
370 :
371 207399 : if (!family) {
372 0 : return;
373 : }
374 :
375 207399 : queue->reset();
376 :
377 207399 : std::unique_lock<Mutex> lock(_resourceMutex);
378 : // Проверяем, есть ли синхронные ожидающие
379 207399 : if (_resourceQueueWaiters > 0) {
380 0 : family->queues.emplace_back(move(queue));
381 0 : _resourceQueueCond.notify_one();
382 0 : return;
383 : }
384 :
385 : // Проверяем, есть ли асинхронные ожидающие
386 207399 : if (family->waiters.empty()) {
387 : XL_VKDEVICE_LOG("releaseQueue ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
388 207398 : family->queues.emplace_back(move(queue));
389 : } else {
390 1 : if (family->waiters.front().handle) {
391 1 : Rc<FrameHandle> handle;
392 1 : Rc<Ref> ref;
393 1 : Function<void(FrameHandle &, const Rc<DeviceQueue> &)> cb;
394 1 : Function<void(FrameHandle &)> invalidate;
395 :
396 1 : cb = move(family->waiters.front().acquireForFrame);
397 1 : invalidate = move(family->waiters.front().releaseForFrame);
398 1 : ref = move(family->waiters.front().ref);
399 1 : handle = move(family->waiters.front().handle);
400 1 : family->waiters.erase(family->waiters.begin());
401 :
402 1 : lock.unlock();
403 :
404 1 : if (handle && handle->isValid()) {
405 1 : if (cb) {
406 1 : queue->setOwner(*handle);
407 : XL_VKDEVICE_LOG("release-acquireQueue ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
408 1 : cb(*handle, queue);
409 : }
410 0 : } else if (invalidate) {
411 : XL_VKDEVICE_LOG("invalidate ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
412 0 : invalidate(*handle);
413 : }
414 :
415 1 : handle = nullptr;
416 1 : } else if (family->waiters.front().loop) {
417 0 : Rc<Loop> loop;
418 0 : Rc<Ref> ref;
419 0 : Function<void(Loop &, const Rc<DeviceQueue> &)> cb;
420 0 : Function<void(Loop &)> invalidate;
421 :
422 0 : cb = move(family->waiters.front().acquireForLoop);
423 0 : invalidate = move(family->waiters.front().releaseForLoop);
424 0 : ref = move(family->waiters.front().ref);
425 0 : loop = move(family->waiters.front().loop);
426 0 : family->waiters.erase(family->waiters.begin());
427 :
428 0 : lock.unlock();
429 :
430 0 : if (loop && loop->isRunning()) {
431 0 : if (cb) {
432 : XL_VKDEVICE_LOG("release-acquireQueue ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
433 0 : cb(*loop, queue);
434 : }
435 0 : } else if (invalidate) {
436 : XL_VKDEVICE_LOG("invalidate ", family->index, " (", family->count, ") ", getQueueOperationsDesc(family->ops));
437 0 : invalidate(*loop);
438 : }
439 :
440 0 : loop = nullptr;
441 0 : }
442 : }
443 :
444 207399 : queue = nullptr;
445 207399 : }
446 :
447 197903 : Rc<CommandPool> Device::acquireCommandPool(QueueOperations c, uint32_t) {
448 197903 : auto family = (DeviceQueueFamily *)getQueueFamily(c);
449 197903 : if (!family) {
450 0 : return nullptr;
451 : }
452 :
453 197903 : std::unique_lock<Mutex> lock(_resourceMutex);
454 197903 : if (!family->pools.empty()) {
455 197882 : auto ret = family->pools.back();
456 197882 : family->pools.pop_back();
457 197882 : return ret;
458 197882 : }
459 21 : lock.unlock();
460 21 : return Rc<CommandPool>::create(*this, family->index, family->ops);
461 197903 : }
462 :
463 0 : Rc<CommandPool> Device::acquireCommandPool(uint32_t familyIndex) {
464 0 : auto family = (DeviceQueueFamily *)getQueueFamily(familyIndex);
465 0 : if (!family) {
466 0 : return nullptr;
467 : }
468 :
469 0 : std::unique_lock<Mutex> lock(_resourceMutex);
470 0 : if (!family->pools.empty()) {
471 0 : auto ret = family->pools.back();
472 0 : family->pools.pop_back();
473 0 : return ret;
474 0 : }
475 0 : lock.unlock();
476 0 : return Rc<CommandPool>::create(*this, family->index, family->ops);
477 0 : }
478 :
479 197882 : void Device::releaseCommandPool(core::Loop &loop, Rc<CommandPool> &&pool) {
480 197882 : pool->reset(*this, true);
481 :
482 : /*auto idx = pool->getFamilyIdx();
483 : std::unique_lock<Mutex> lock(_resourceMutex);
484 : for (auto &it : _families) {
485 : if (it.index == idx) {
486 : it.pools.emplace_back(move(pool));
487 : break;
488 : }
489 : }*/
490 :
491 197882 : auto refId = retain();
492 197882 : loop.performInQueue(Rc<thread::Task>::create([this, pool = Rc<CommandPool>(pool)] (const thread::Task &) -> bool {
493 197882 : pool->reset(*this);
494 197882 : return true;
495 791528 : }, [this, pool = Rc<CommandPool>(pool), refId] (const thread::Task &, bool success) mutable {
496 197882 : if (success) {
497 197882 : auto idx = pool->getFamilyIdx();
498 197882 : std::unique_lock<Mutex> lock(_resourceMutex);
499 572122 : for (auto &it : _families) {
500 572122 : if (it.index == idx) {
501 197882 : it.pools.emplace_back(move(pool));
502 197882 : break;
503 : }
504 : }
505 197882 : }
506 197882 : release(refId);
507 395764 : }, this));
508 197882 : }
509 :
510 21 : void Device::releaseCommandPoolUnsafe(Rc<CommandPool> &&pool) {
511 21 : pool->reset(*this);
512 :
513 21 : std::unique_lock<Mutex> lock(_resourceMutex);
514 84 : for (auto &it : _families) {
515 63 : if (it.index == pool->getFamilyIdx()) {
516 21 : it.pools.emplace_back(Rc<CommandPool>(pool));
517 : }
518 : }
519 21 : }
520 :
521 210 : static BytesView Device_emplaceConstant(Bytes &data, BytesView constant) {
522 210 : auto originalSize = data.size();
523 210 : auto constantSize = constant.size();
524 210 : data.resize(originalSize + constantSize);
525 210 : memcpy(data.data() + originalSize, constant.data(), constantSize);
526 210 : return BytesView(data.data() + originalSize, constantSize);
527 : }
528 :
529 210 : BytesView Device::emplaceConstant(core::PredefinedConstant c, Bytes &data) const {
530 210 : uint32_t intData = 0;
531 210 : switch (c) {
532 84 : case core::PredefinedConstant::SamplersArraySize:
533 84 : return Device_emplaceConstant(data, BytesView((const uint8_t *)&_samplersCount, sizeof(uint32_t)));
534 : break;
535 0 : case core::PredefinedConstant::SamplersDescriptorIdx:
536 0 : intData = 0;
537 0 : return Device_emplaceConstant(data, BytesView((const uint8_t *)&intData, sizeof(uint32_t)));
538 : break;
539 63 : case core::PredefinedConstant::TexturesArraySize:
540 63 : return Device_emplaceConstant(data, BytesView((const uint8_t *)&_textureSetLayout->getImageCount(), sizeof(uint32_t)));
541 : break;
542 0 : case core::PredefinedConstant::TexturesDescriptorIdx:
543 0 : intData = 1;
544 0 : return Device_emplaceConstant(data, BytesView((const uint8_t *)&intData, sizeof(uint32_t)));
545 : break;
546 63 : case core::PredefinedConstant::BuffersArraySize:
547 63 : return Device_emplaceConstant(data, BytesView((const uint8_t *)&_textureSetLayout->getBuffersCount(), sizeof(uint32_t)));
548 : break;
549 0 : case core::PredefinedConstant::BuffersDescriptorIdx:
550 0 : intData = 2;
551 0 : return Device_emplaceConstant(data, BytesView((const uint8_t *)&intData, sizeof(uint32_t)));
552 : break;
553 0 : case core::PredefinedConstant::CurrentSamplerIdx:
554 0 : break;
555 : }
556 0 : return BytesView();
557 : }
558 :
559 441 : bool Device::supportsUpdateAfterBind(DescriptorType type) const {
560 441 : if (!_updateAfterBindEnabled) {
561 0 : return false;
562 : }
563 441 : switch (type) {
564 0 : case DescriptorType::Sampler:
565 0 : return true; // Samplers are immutable engine-wide
566 : break;
567 0 : case DescriptorType::CombinedImageSampler:
568 0 : return _info.features.deviceDescriptorIndexing.descriptorBindingSampledImageUpdateAfterBind;
569 : break;
570 21 : case DescriptorType::SampledImage:
571 21 : return _info.features.deviceDescriptorIndexing.descriptorBindingSampledImageUpdateAfterBind;
572 : break;
573 42 : case DescriptorType::StorageImage:
574 42 : return _info.features.deviceDescriptorIndexing.descriptorBindingStorageImageUpdateAfterBind;
575 : break;
576 0 : case DescriptorType::UniformTexelBuffer:
577 0 : return _info.features.deviceDescriptorIndexing.descriptorBindingUniformTexelBufferUpdateAfterBind;
578 : break;
579 0 : case DescriptorType::StorageTexelBuffer:
580 0 : return _info.features.deviceDescriptorIndexing.descriptorBindingStorageTexelBufferUpdateAfterBind;
581 : break;
582 63 : case DescriptorType::UniformBuffer:
583 : case DescriptorType::UniformBufferDynamic:
584 63 : return _info.features.deviceDescriptorIndexing.descriptorBindingUniformBufferUpdateAfterBind;
585 : break;
586 294 : case DescriptorType::StorageBuffer:
587 : case DescriptorType::StorageBufferDynamic:
588 294 : return _info.features.deviceDescriptorIndexing.descriptorBindingStorageBufferUpdateAfterBind;
589 : break;
590 21 : case DescriptorType::InputAttachment:
591 : case DescriptorType::Attachment:
592 : case DescriptorType::Unknown:
593 21 : return false;
594 : break;
595 : }
596 0 : return false;
597 : }
598 :
599 84 : Rc<core::ImageObject> Device::getEmptyImageObject() const {
600 168 : return _textureSetLayout->getEmptyImageObject();
601 : }
602 :
603 84 : Rc<core::ImageObject> Device::getSolidImageObject() const {
604 168 : return _textureSetLayout->getSolidImageObject();
605 : }
606 :
607 117 : Rc<core::Framebuffer> Device::makeFramebuffer(const core::QueuePassData *pass, SpanView<Rc<core::ImageView>> views) {
608 234 : return Rc<Framebuffer>::create(*this, (RenderPass *)pass->impl.get(), views);
609 : }
610 :
611 105 : auto Device::makeImage(const ImageInfoData &imageInfo) -> Rc<ImageStorage> {
612 105 : bool isTransient = (imageInfo.usage & core::ImageUsage::TransientAttachment) != core::ImageUsage::None;
613 :
614 : auto img = _allocator->spawnPersistent(
615 : isTransient ? AllocationUsage::DeviceLocalLazilyAllocated : AllocationUsage::DeviceLocal,
616 105 : imageInfo, false);
617 :
618 210 : return Rc<ImageStorage>::create(move(img));
619 105 : }
620 :
621 126189 : Rc<core::Semaphore> Device::makeSemaphore() {
622 126189 : auto ret = Rc<Semaphore>::create(*this);
623 252378 : return ret;
624 126189 : }
625 :
626 126 : Rc<core::ImageView> Device::makeImageView(const Rc<core::ImageObject> &img, const ImageViewInfo &info) {
627 126 : auto ret = Rc<ImageView>::create(*this, (Image *)img.get(), info);
628 252 : return ret;
629 126 : }
630 :
631 21 : bool Device::hasNonSolidFillMode() const {
632 21 : return _info.features.device10.features.fillModeNonSolid;
633 : }
634 :
635 11650 : bool Device::hasDynamicIndexedBuffers() const {
636 11650 : return _info.features.device10.features.shaderStorageBufferArrayDynamicIndexing;
637 : }
638 :
639 42 : void Device::waitIdle() const {
640 42 : _table->vkDeviceWaitIdle(_device);
641 42 : }
642 :
643 42 : void Device::compileImage(const Loop &loop, const Rc<core::DynamicImage> &img, Function<void(bool)> &&cb) {
644 : struct CompileImageTask : public Ref {
645 : Function<void(bool)> callback;
646 : Rc<core::DynamicImage> image;
647 : Rc<Loop> loop;
648 : Rc<Device> device;
649 :
650 : Rc<Buffer> transferBuffer;
651 : Rc<Image> resultImage;
652 : Rc<CommandPool> pool;
653 : Rc<DeviceQueue> queue;
654 : Rc<Fence> fence;
655 : };
656 :
657 84 : auto task = new CompileImageTask();
658 42 : task->callback = move(cb);
659 42 : task->image = img;
660 42 : task->loop = (Loop *)&loop;
661 42 : task->device = this;
662 :
663 42 : loop.performInQueue([this, task] () {
664 : // make transfer buffer
665 :
666 42 : task->image->acquireData([&] (BytesView view) {
667 126 : task->transferBuffer = task->device->getAllocator()->spawnPersistent(AllocationUsage::HostTransitionSource,
668 126 : BufferInfo(core::ForceBufferUsage(core::BufferUsage::TransferSrc), core::PassType::Transfer), view);
669 42 : });
670 :
671 42 : task->resultImage = task->device->getAllocator()->spawnPersistent(AllocationUsage::DeviceLocal, task->image->getInfo(), false);
672 :
673 42 : if (!task->transferBuffer) {
674 0 : task->loop->performOnGlThread([task] {
675 0 : task->callback(false);
676 0 : task->release(0);
677 0 : });
678 0 : return;
679 : }
680 :
681 42 : task->loop->performOnGlThread([this, task] {
682 42 : task->device->acquireQueue(QueueOperations::Transfer, *task->loop, [this, task] (Loop &loop, const Rc<DeviceQueue> &queue) {
683 42 : task->fence = loop.acquireFence(0);
684 42 : task->pool = task->device->acquireCommandPool(QueueOperations::Transfer);
685 42 : task->queue = move(queue);
686 :
687 42 : auto refId = task->retain();
688 42 : task->fence->addRelease([task, refId] (bool) {
689 42 : task->device->releaseCommandPool(*task->loop, move(task->pool));
690 42 : task->transferBuffer->dropPendingBarrier(); // hold reference while commands is active
691 42 : task->release(refId);
692 42 : }, this, "TextureSetLayout::compileImage transferBuffer->dropPendingBarrier");
693 :
694 42 : loop.performInQueue(Rc<thread::Task>::create([this, task] (const thread::Task &) -> bool {
695 42 : auto buf = task->pool->recordBuffer(*task->device, [&, this] (CommandBuffer &buf) {
696 42 : auto f = getQueueFamily(task->resultImage->getInfo().type);
697 42 : buf.writeImageTransfer(task->pool->getFamilyIdx(), f ? f->index : VK_QUEUE_FAMILY_IGNORED,
698 42 : task->transferBuffer, task->resultImage);
699 42 : return true;
700 : });
701 :
702 42 : if (task->queue->submit(*task->fence, buf)) {
703 42 : return true;
704 : }
705 0 : return false;
706 420 : }, [task] (const thread::Task &, bool success) {
707 42 : if (task->queue) {
708 42 : task->device->releaseQueue(move(task->queue));
709 : }
710 42 : if (success) {
711 42 : task->image->setImage(task->resultImage.get());
712 42 : task->callback(true);
713 : } else {
714 0 : task->callback(false);
715 : }
716 42 : task->fence->schedule(*task->loop);
717 42 : task->fence = nullptr;
718 42 : task->release(0);
719 42 : }));
720 42 : }, [task] (Loop &) {
721 0 : task->callback(false);
722 0 : task->release(0);
723 0 : });
724 42 : });
725 : }, (Loop *)&loop);
726 42 : }
727 :
728 42 : void Device::compileSamplers(thread::TaskQueue &q, bool force) {
729 42 : _immutableSamplers.resize(_samplersInfo.size(), VK_NULL_HANDLE);
730 42 : _samplers.resize(_samplersInfo.size(), nullptr);
731 42 : _samplersCount = _samplersInfo.size();
732 :
733 42 : size_t i = 0;
734 126 : for (auto &it : _samplersInfo) {
735 84 : auto objIt = _samplers.data() + i;
736 84 : auto glIt = _immutableSamplers.data() + i;
737 84 : q.perform(Rc<thread::Task>::create([this, objIt, glIt, v = &it] (const thread::Task &) -> bool {
738 81 : *objIt = Rc<Sampler>::create(*this, *v);
739 84 : *glIt = (*objIt)->getSampler();
740 84 : return true;
741 126 : }, [this] (const thread::Task &, bool) {
742 84 : ++ _compiledSamplers;
743 84 : if (_compiledSamplers == _samplersCount) {
744 42 : _samplersCompiled = true;
745 : }
746 84 : }));
747 :
748 84 : ++ i;
749 : }
750 42 : if (force) {
751 42 : q.waitForAll();
752 : }
753 42 : }
754 :
755 42 : bool Device::setup(const Instance *instance, VkPhysicalDevice p, const Properties &prop,
756 : const Vector<DeviceQueueFamily> &queueFamilies, const Features &f, const Vector<StringView> &ext) {
757 42 : _enabledFeatures = f;
758 :
759 42 : Vector<const char *> requiredExtension;
760 42 : requiredExtension.reserve(ext.size());
761 105 : for (auto &it : ext) {
762 63 : requiredExtension.emplace_back(it.data());
763 : }
764 :
765 42 : Vector<VkDeviceQueueCreateInfo> queueCreateInfos;
766 :
767 42 : uint32_t maxQueues = 0;
768 168 : for (auto &it : queueFamilies) {
769 126 : maxQueues = std::max(it.count, maxQueues);
770 : }
771 :
772 42 : Vector<float> queuePriority;
773 42 : queuePriority.resize(maxQueues, 1.0f);
774 :
775 168 : for (auto & queueFamily : queueFamilies) {
776 126 : VkDeviceQueueCreateInfo queueCreateInfo = { };
777 126 : queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
778 126 : queueCreateInfo.queueFamilyIndex = queueFamily.index;
779 126 : queueCreateInfo.queueCount = queueFamily.count;
780 126 : queueCreateInfo.pQueuePriorities = queuePriority.data();
781 126 : queueCreateInfos.push_back(queueCreateInfo);
782 : }
783 :
784 42 : VkDeviceCreateInfo deviceCreateInfo = { };
785 42 : deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
786 42 : void *next = nullptr;
787 : #ifdef VK_ENABLE_BETA_EXTENSIONS
788 : if ((features.flags & ExtensionFlags::Portability) != ExtensionFlags::None) {
789 : features.devicePortability.pNext = next;
790 : next = &features.devicePortability;
791 : }
792 : #endif
793 : #if VK_VERSION_1_3
794 42 : if (prop.device10.properties.apiVersion >= VK_API_VERSION_1_3) {
795 42 : _enabledFeatures.device13.pNext = next;
796 42 : _enabledFeatures.device12.pNext = &_enabledFeatures.device13;
797 42 : _enabledFeatures.device11.pNext = &_enabledFeatures.device12;
798 42 : _enabledFeatures.device10.pNext = &_enabledFeatures.device11;
799 42 : deviceCreateInfo.pNext = &_enabledFeatures.device11;
800 : } else
801 : #endif
802 0 : if (prop.device10.properties.apiVersion >= VK_API_VERSION_1_2) {
803 0 : _enabledFeatures.device12.pNext = next;
804 0 : _enabledFeatures.device11.pNext = &_enabledFeatures.device12;
805 0 : _enabledFeatures.device10.pNext = &_enabledFeatures.device11;
806 0 : deviceCreateInfo.pNext = &_enabledFeatures.device11;
807 : } else {
808 0 : if ((_enabledFeatures.flags & ExtensionFlags::Storage16Bit) != ExtensionFlags::None) {
809 0 : _enabledFeatures.device16bitStorage.pNext = next;
810 0 : next = &_enabledFeatures.device16bitStorage;
811 : }
812 0 : if ((_enabledFeatures.flags & ExtensionFlags::Storage8Bit) != ExtensionFlags::None) {
813 0 : _enabledFeatures.device8bitStorage.pNext = next;
814 0 : next = &_enabledFeatures.device8bitStorage;
815 : }
816 0 : if ((_enabledFeatures.flags & ExtensionFlags::ShaderFloat16) != ExtensionFlags::None || (_enabledFeatures.flags & ExtensionFlags::ShaderInt8) != ExtensionFlags::None) {
817 0 : _enabledFeatures.deviceShaderFloat16Int8.pNext = next;
818 0 : next = &_enabledFeatures.deviceShaderFloat16Int8;
819 : }
820 0 : if ((_enabledFeatures.flags & ExtensionFlags::DescriptorIndexing) != ExtensionFlags::None) {
821 0 : _enabledFeatures.deviceDescriptorIndexing.pNext = next;
822 0 : next = &_enabledFeatures.deviceDescriptorIndexing;
823 : }
824 0 : if ((_enabledFeatures.flags & ExtensionFlags::DeviceAddress) != ExtensionFlags::None) {
825 0 : _enabledFeatures.deviceBufferDeviceAddress.pNext = next;
826 0 : next = &_enabledFeatures.deviceBufferDeviceAddress;
827 : }
828 0 : deviceCreateInfo.pNext = next;
829 : }
830 42 : deviceCreateInfo.queueCreateInfoCount = static_cast<uint32_t>(queueCreateInfos.size());
831 42 : deviceCreateInfo.pQueueCreateInfos = queueCreateInfos.data();
832 42 : deviceCreateInfo.pEnabledFeatures = &_enabledFeatures.device10.features;
833 42 : deviceCreateInfo.enabledExtensionCount = static_cast<uint32_t>(requiredExtension.size());
834 42 : deviceCreateInfo.ppEnabledExtensionNames = requiredExtension.data();
835 :
836 : if constexpr (s_enableValidationLayers) {
837 42 : deviceCreateInfo.enabledLayerCount = static_cast<uint32_t>(sizeof(s_validationLayers) / sizeof(const char *));
838 42 : deviceCreateInfo.ppEnabledLayerNames = s_validationLayers;
839 : } else {
840 : deviceCreateInfo.enabledLayerCount = 0;
841 : }
842 :
843 42 : if (instance->vkCreateDevice(p, &deviceCreateInfo, nullptr, &_device) != VK_SUCCESS) {
844 0 : return false;
845 : }
846 :
847 : #if VK_HOOK_DEBUG
848 : auto hookTable = new DeviceTable(DeviceTable::makeHooks());
849 : _original = new DeviceTable(instance->vkGetDeviceProcAddr, _device);
850 : _table = hookTable;
851 : #else
852 42 : _table = new DeviceTable(instance->vkGetDeviceProcAddr, _device);
853 : #endif
854 :
855 42 : return true;
856 42 : }
857 :
858 : }
|