From 38bea1c86a1b3cb9f666f95fa087da575f87ef02 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 20 Jun 2023 11:44:14 +0200 Subject: [PATCH] vkd3d: Add implementation of a queue timeline. The purpose of this is to get a high-level glance of: - CPU / GPU overlap - Latency - Being able to correlate performance against events which may lead to stutter such as resource allocations, etc. The format is the trivial JSON format supported by Chromium chrome://tracing. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 142 +++++++-- libs/vkd3d/device.c | 10 +- libs/vkd3d/heap.c | 3 + libs/vkd3d/memory.c | 2 +- libs/vkd3d/meson.build | 3 +- libs/vkd3d/queue_timeline.c | 563 ++++++++++++++++++++++++++++++++++++ libs/vkd3d/resource.c | 3 + libs/vkd3d/swapchain.c | 28 ++ libs/vkd3d/vkd3d_private.h | 152 +++++++++- 9 files changed, 864 insertions(+), 42 deletions(-) create mode 100644 libs/vkd3d/queue_timeline.c diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index a041bcf8f0..65ce990604 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -26,7 +26,7 @@ #include "vkd3d_renderdoc.h" #endif -static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value); +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, struct vkd3d_fence_worker *worker, uint64_t value); static void d3d12_command_queue_add_submission(struct d3d12_command_queue *queue, const struct d3d12_command_queue_submission *sub); static void d3d12_fence_inc_ref(struct d3d12_fence *fence); @@ -236,6 +236,7 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) pthread_mutex_destroy(&queue->mutex); vkd3d_free(queue->wait_semaphores); + vkd3d_free(queue->wait_values_virtual); vkd3d_free(queue->wait_fences); vkd3d_free(queue); } @@ -271,7 +272,8 @@ void vkd3d_queue_release(struct vkd3d_queue *queue) pthread_mutex_unlock(&queue->mutex); } -void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, VkSemaphore semaphore, uint64_t value) +void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, VkSemaphore semaphore, + uint64_t value, uint64_t virtual_value) { VkSemaphoreSubmitInfo *wait_semaphore; uint32_t i; @@ -292,7 +294,9 @@ void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, if (!vkd3d_array_reserve((void**)&queue->wait_semaphores, &queue->wait_semaphores_size, queue->wait_count + 1, sizeof(*queue->wait_semaphores)) || !vkd3d_array_reserve((void**)&queue->wait_fences, &queue->wait_fences_size, - queue->wait_count + 1, sizeof(*queue->wait_fences))) + queue->wait_count + 1, sizeof(*queue->wait_fences)) || + !vkd3d_array_reserve((void**)&queue->wait_values_virtual, &queue->wait_values_virtual_size, + queue->wait_count + 1, sizeof(*queue->wait_values_virtual))) { ERR("Failed to add semaphore wait to queue.\n"); pthread_mutex_unlock(&queue->mutex); @@ -308,6 +312,7 @@ void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, wait_semaphore->stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; queue->wait_fences[queue->wait_count] = waiter; + queue->wait_values_virtual[queue->wait_count] = virtual_value; queue->wait_count += 1; pthread_mutex_unlock(&queue->mutex); @@ -324,14 +329,11 @@ static void vkd3d_queue_reset_wait_count_locked(struct vkd3d_queue *vkd3d_queue) vkd3d_queue->wait_count = 0; } -static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, - d3d12_fence_iface *fence, VkSemaphore timeline, uint64_t value, bool signal, - struct d3d12_command_allocator **command_allocators, size_t num_command_allocators); - static void vkd3d_queue_push_waiters_to_worker_locked(struct vkd3d_queue *vkd3d_queue, struct vkd3d_fence_worker *worker, VkSemaphore timeline, uint64_t value) { + struct vkd3d_queue_timeline_trace_cookie cookie; HRESULT hr; size_t i; @@ -339,9 +341,12 @@ static void vkd3d_queue_push_waiters_to_worker_locked(struct vkd3d_queue *vkd3d_ { if (vkd3d_queue->wait_fences[i]) { + cookie = vkd3d_queue_timeline_trace_register_wait(&worker->device->queue_timeline_trace, + vkd3d_queue->wait_fences[i], vkd3d_queue->wait_values_virtual[i]); + if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(worker, vkd3d_queue->wait_fences[i], timeline, value, false, - NULL, 0))) + NULL, 0, &cookie))) { ERR("Failed to enqueue timeline semaphore.\n"); } @@ -468,9 +473,10 @@ static HRESULT vkd3d_create_timeline_semaphore(struct d3d12_device *device, uint return hresult_from_vk_result(vr); } -static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, +HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, d3d12_fence_iface *fence, VkSemaphore timeline, uint64_t value, bool signal, - struct d3d12_command_allocator **command_allocators, size_t num_command_allocators) + struct d3d12_command_allocator **command_allocators, size_t num_command_allocators, + const struct vkd3d_queue_timeline_trace_cookie *timeline_cookie) { struct vkd3d_waiting_fence *waiting_fence; size_t i; @@ -484,6 +490,11 @@ static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worke for (i = 0; i < num_command_allocators; i++) d3d12_command_allocator_dec_ref(command_allocators[i]); vkd3d_free(command_allocators); + if (timeline_cookie) + { + vkd3d_queue_timeline_trace_complete_execute(&worker->device->queue_timeline_trace, + NULL, *timeline_cookie); + } return hresult_from_errno(rc); } @@ -495,6 +506,11 @@ static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worke for (i = 0; i < num_command_allocators; i++) d3d12_command_allocator_dec_ref(command_allocators[i]); vkd3d_free(command_allocators); + if (timeline_cookie) + { + vkd3d_queue_timeline_trace_complete_execute(&worker->device->queue_timeline_trace, + NULL, *timeline_cookie); + } return E_OUTOFMEMORY; } @@ -508,6 +524,13 @@ static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worke waiting_fence->signal = signal; waiting_fence->command_allocators = command_allocators; waiting_fence->num_command_allocators = num_command_allocators; + if (timeline_cookie) + { + waiting_fence->timeline_cookie = *timeline_cookie; + vkd3d_queue_timeline_trace_begin_execute(&worker->device->queue_timeline_trace, *timeline_cookie); + } + else + memset(&waiting_fence->timeline_cookie, 0, sizeof(waiting_fence->timeline_cookie)); ++worker->enqueued_fence_count; pthread_cond_signal(&worker->cond); @@ -515,12 +538,15 @@ static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worke return S_OK; } -static void vkd3d_waiting_fence_release_submissions(const struct vkd3d_waiting_fence *fence) +static void vkd3d_waiting_fence_release_submissions(struct d3d12_device *device, + struct vkd3d_fence_worker *worker, + const struct vkd3d_waiting_fence *fence) { size_t i; for (i = 0; i < fence->num_command_allocators; i++) d3d12_command_allocator_dec_ref(fence->command_allocators[i]); vkd3d_free(fence->command_allocators); + vkd3d_queue_timeline_trace_complete_execute(&device->queue_timeline_trace, worker, fence->timeline_cookie); } static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *fence) @@ -553,7 +579,7 @@ static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *wor { ERR("Failed to wait for Vulkan timeline semaphore, vr %d.\n", vr); VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST || vr == VK_TIMEOUT); - vkd3d_waiting_fence_release_submissions(fence); + vkd3d_waiting_fence_release_submissions(device, worker, fence); return; } @@ -565,7 +591,7 @@ static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *wor { local_fence = impl_from_ID3D12Fence1(fence->fence); TRACE("Signaling fence %p value %#"PRIx64".\n", local_fence, fence->value); - if (FAILED(hr = d3d12_fence_signal(local_fence, fence->value))) + if (FAILED(hr = d3d12_fence_signal(local_fence, worker, fence->value))) ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); } @@ -576,7 +602,7 @@ static void vkd3d_wait_for_gpu_timeline_semaphore(struct vkd3d_fence_worker *wor * Such execute commands can be paired with a d3d12_fence_dec_ref(), * but no signalling operation. */ assert(!fence->num_command_allocators || !fence->signal); - vkd3d_waiting_fence_release_submissions(fence); + vkd3d_waiting_fence_release_submissions(device, worker, fence); } static void *vkd3d_fence_worker_main(void *arg) @@ -591,6 +617,16 @@ static void *vkd3d_fence_worker_main(void *arg) vkd3d_set_thread_name("vkd3d_fence"); + snprintf(worker->timeline.tid, sizeof(worker->timeline.tid), +#ifdef _WIN32 + "family %u, tid 0x%04x, prio %d", +#else + "family %u, tid %u, prio %d", +#endif + worker->queue->vkd3d_queue->vk_family_index, + vkd3d_get_current_thread_id(), + worker->queue->desc.Priority); + cur_fence_count = 0; cur_fences_size = 0; cur_fences = NULL; @@ -638,7 +674,8 @@ static void *vkd3d_fence_worker_main(void *arg) return NULL; } -HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, +static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, + struct d3d12_command_queue *queue, struct d3d12_device *device) { int rc; @@ -647,6 +684,7 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->should_exit = false; worker->device = device; + worker->queue = queue; worker->enqueued_fence_count = 0; worker->enqueued_fences = NULL; @@ -675,7 +713,7 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, return S_OK; } -HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, +static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, struct d3d12_device *device) { int rc; @@ -697,6 +735,7 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, pthread_cond_destroy(&worker->cond); vkd3d_free(worker->enqueued_fences); + vkd3d_free(worker->timeline.list_buffer); return S_OK; } @@ -774,7 +813,8 @@ static void d3d12_fence_dec_ref(struct d3d12_fence *fence) } } -static HRESULT vkd3d_waiting_event_signal(const struct vkd3d_waiting_event *event) +static HRESULT vkd3d_waiting_event_signal(struct d3d12_device *device, struct vkd3d_fence_worker *worker, + const struct vkd3d_waiting_event *event) { bool do_signal = false; uint32_t payload; @@ -821,6 +861,8 @@ static HRESULT vkd3d_waiting_event_signal(const struct vkd3d_waiting_event *even if (!do_signal) return S_FALSE; + vkd3d_queue_timeline_trace_complete_event_signal(&device->queue_timeline_trace, worker, event->timeline_cookie); + if (!vkd3d_native_sync_handle_is_valid(event->handle)) { *event->latch = true; @@ -835,7 +877,7 @@ static HRESULT vkd3d_waiting_event_signal(const struct vkd3d_waiting_event *even return hr; } -static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) +static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence, struct vkd3d_fence_worker *worker) { bool signal_null_event_cond = false; unsigned int i, j; @@ -846,7 +888,7 @@ static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) if (current->value <= fence->virtual_value) { - vkd3d_waiting_event_signal(current); + vkd3d_waiting_event_signal(fence->device, worker, current); if (!vkd3d_native_sync_handle_is_valid(current->handle)) signal_null_event_cond = true; @@ -935,7 +977,7 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen } fence->virtual_value = value; - d3d12_fence_signal_external_events_locked(fence); + d3d12_fence_signal_external_events_locked(fence, NULL); d3d12_fence_update_pending_value_locked(fence); pthread_mutex_unlock(&fence->mutex); return S_OK; @@ -977,7 +1019,7 @@ static uint64_t d3d12_fence_get_physical_wait_value_locked(struct d3d12_fence *f return target_physical_value; } -static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t physical_value) +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, struct vkd3d_fence_worker *worker, uint64_t physical_value) { bool did_signal; size_t i; @@ -1004,7 +1046,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t physical_v if (fence->physical_value == fence->pending_updates[i].physical_value) { fence->virtual_value = fence->pending_updates[i].virtual_value; - d3d12_fence_signal_external_events_locked(fence); + d3d12_fence_signal_external_events_locked(fence, worker); fence->pending_updates[i] = fence->pending_updates[--fence->pending_updates_count]; did_signal = true; break; @@ -1171,11 +1213,14 @@ static HRESULT d3d12_fence_set_native_sync_handle_on_completion_explicit(struct if (value <= fence->virtual_value) { - hr = vkd3d_waiting_event_signal(&event); + hr = vkd3d_waiting_event_signal(fence->device, NULL, &event); pthread_mutex_unlock(&fence->mutex); return hr; } + event.timeline_cookie = vkd3d_queue_timeline_trace_register_event_signal(&fence->device->queue_timeline_trace, + handle, &fence->ID3D12Fence_iface, value); + if (wait_type == VKD3D_WAITING_EVENT_SINGLE && vkd3d_native_sync_handle_is_valid(handle)) { for (i = 0; i < fence->event_count; ++i) @@ -1566,7 +1611,7 @@ static void *vkd3d_shared_fence_worker_main(void *userdata) { if (current->wait.value <= completed_value) { - vkd3d_waiting_event_signal(¤t->wait); + vkd3d_waiting_event_signal(fence->device, NULL, ¤t->wait); list_remove(¤t->entry); vkd3d_free(current); } @@ -1627,11 +1672,14 @@ static HRESULT d3d12_shared_fence_set_native_sync_handle_on_completion_explicit( if (completed_value >= value) { - vkd3d_waiting_event_signal(&event); + vkd3d_waiting_event_signal(fence->device, NULL, &event); return S_OK; } else { + event.timeline_cookie = vkd3d_queue_timeline_trace_register_event_signal(&fence->device->queue_timeline_trace, + handle, &fence->ID3D12Fence_iface, value); + if (!(waiting_event = vkd3d_malloc(sizeof(*waiting_event)))) { ERR("Failed to register device singleton for adapter."); @@ -1658,6 +1706,9 @@ static HRESULT d3d12_shared_fence_set_native_sync_handle_on_completion_explicit( } else { + event.timeline_cookie = vkd3d_queue_timeline_trace_register_event_signal(&fence->device->queue_timeline_trace, + handle, &fence->ID3D12Fence_iface, value); + wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; wait_info.pNext = NULL; wait_info.flags = 0; @@ -1671,6 +1722,9 @@ static HRESULT d3d12_shared_fence_set_native_sync_handle_on_completion_explicit( return E_FAIL; } + vkd3d_queue_timeline_trace_complete_event_signal(&fence->device->queue_timeline_trace, + NULL, event.timeline_cookie); + return S_OK; } } @@ -1929,6 +1983,7 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm #endif allocator->current_command_list = list; + list->timeline_cookie = vkd3d_queue_timeline_trace_register_command_list(&allocator->device->queue_timeline_trace); return S_OK; } @@ -2469,6 +2524,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo vk_procs = &device->vk_procs; d3d12_command_allocator_free_resources(allocator, true); + + vkd3d_queue_timeline_trace_register_instantaneous(&device->queue_timeline_trace, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMAND_ALLOCATOR_RESET, allocator->command_buffer_count); + if (allocator->command_buffer_count) { VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, @@ -2507,6 +2566,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo allocator->query_pool_count = 0; memset(&allocator->active_query_pools, 0, sizeof(allocator->active_query_pools)); + return S_OK; } @@ -2576,7 +2636,6 @@ void d3d12_device_unmap_vkd3d_queue(struct d3d12_device *device, pthread_mutex_unlock(&device->mutex); } - static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allocator, struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type) { @@ -5235,6 +5294,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(d3d12_command_list_ifa } list->is_recording = false; + vkd3d_queue_timeline_trace_close_command_list(&list->device->queue_timeline_trace, list->timeline_cookie); if (!list->is_valid) { @@ -15710,6 +15770,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm UINT command_list_count, ID3D12CommandList * const *command_lists) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct vkd3d_queue_timeline_trace_cookie timeline_cookie; struct vkd3d_initial_transition *transitions; size_t num_transitions, num_command_buffers; VkCommandBufferSubmitInfo *buffers, *buffer; @@ -15778,6 +15839,10 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm breadcrumb_indices = NULL; #endif + timeline_cookie = vkd3d_queue_timeline_trace_register_execute( + &command_queue->device->queue_timeline_trace, + command_lists, command_list_count); + sub.execute.debug_capture = false; sub.execute.split_submission = false; @@ -15808,6 +15873,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm #ifdef VKD3D_ENABLE_BREADCRUMBS vkd3d_free(breadcrumb_indices); #endif + vkd3d_queue_timeline_trace_complete_execute(&command_queue->device->queue_timeline_trace, + NULL, timeline_cookie); return; } @@ -15905,6 +15972,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm sub.execute.breadcrumb_indices = breadcrumb_indices; sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0; #endif + sub.execute.timeline_cookie = timeline_cookie; d3d12_command_queue_add_submission(command_queue, &sub); } @@ -16125,12 +16193,13 @@ static void d3d12_command_queue_wait(struct d3d12_command_queue *command_queue, * This is also important, since we have to hold on to a private reference on the fence * until we have observed the wait to actually complete. */ assert(fence->timeline_semaphore); - vkd3d_queue_add_wait(command_queue->vkd3d_queue, &fence->ID3D12Fence_iface, fence->timeline_semaphore, wait_count); + vkd3d_queue_add_wait(command_queue->vkd3d_queue, &fence->ID3D12Fence_iface, fence->timeline_semaphore, wait_count, value); } static void d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, UINT64 value) { + struct vkd3d_queue_timeline_trace_cookie cookie; const struct vkd3d_vk_device_procs *vk_procs; VkSemaphoreSubmitInfo signal_semaphore_info; struct vkd3d_queue *vkd3d_queue; @@ -16190,8 +16259,11 @@ static void d3d12_command_queue_signal(struct d3d12_command_queue *command_queue VKD3D_DEVICE_REPORT_BREADCRUMB_IF(command_queue->device, vr == VK_ERROR_DEVICE_LOST); + cookie = vkd3d_queue_timeline_trace_register_signal(&command_queue->device->queue_timeline_trace, + &fence->ID3D12Fence_iface, value); + if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, &fence->ID3D12Fence_iface, - fence->timeline_semaphore, physical_value, true, NULL, 0))) + fence->timeline_semaphore, physical_value, true, NULL, 0, &cookie))) { ERR("Failed to enqueue timeline semaphore, hr #%x.\n", hr); } @@ -16285,7 +16357,7 @@ static void d3d12_command_queue_signal_shared(struct d3d12_command_queue *comman VKD3D_DEVICE_REPORT_BREADCRUMB_IF(command_queue->device, vr == VK_ERROR_DEVICE_LOST); if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, &fence->ID3D12Fence_iface, - vkd3d_queue->submission_timeline, vkd3d_queue->submission_timeline_count, true, NULL, 0))) + vkd3d_queue->submission_timeline, vkd3d_queue->submission_timeline_count, true, NULL, 0, NULL))) { ERR("Failed to enqueue timeline semaphore, hr #%x.\n", hr); } @@ -16570,6 +16642,7 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu const VkCommandBufferSubmitInfo *transition_cmd, const VkSemaphoreSubmitInfo *transition_semaphore, struct d3d12_command_allocator **command_allocators, size_t num_command_allocators, + struct vkd3d_queue_timeline_trace_cookie timeline_cookie, bool debug_capture, bool split_submissions) { const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs; @@ -16620,6 +16693,8 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu for (i = 0; i < num_command_allocators; i++) d3d12_command_allocator_dec_ref(command_allocators[i]); vkd3d_free(command_allocators); + vkd3d_queue_timeline_trace_complete_execute(&command_queue->device->queue_timeline_trace, + NULL, timeline_cookie); return; } @@ -16694,7 +16769,7 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu if (FAILED(hr = vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, NULL, vkd3d_queue->submission_timeline, signal_semaphore_info.value, false, - command_allocators, num_command_allocators))) + command_allocators, num_command_allocators, &timeline_cookie))) { ERR("Failed to enqueue timeline semaphore.\n"); } @@ -17087,6 +17162,7 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) VKD3D_REGION_DECL(queue_execute); vkd3d_set_thread_name("vkd3d_queue"); + queue->submission_thread_tid = vkd3d_get_current_thread_id(); if (FAILED(hr = d3d12_command_queue_transition_pool_init(&pool, queue))) ERR("Failed to initialize transition pool.\n"); @@ -17156,9 +17232,11 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata) &transition_cmd, &transition_semaphore, submission.execute.command_allocators, submission.execute.num_command_allocators, + submission.execute.timeline_cookie, submission.execute.debug_capture, submission.execute.split_submission); - /* command_queue_execute takes ownership of the outstanding_submission_counters allocation. + /* command_queue_execute takes ownership of the + * outstanding_submission_counters and queue_timeline_indices allocations. * The atomic counters are decremented when the submission is observed to be freed. * On error, the counters are freed early, so there is no risk of leak. */ vkd3d_free(submission.execute.cmd); @@ -17259,7 +17337,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, d3d12_device_add_ref(queue->device = device); - if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, device))) + if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue, device))) goto fail_fence_worker_start; if ((rc = pthread_create(&queue->submission_thread, NULL, d3d12_command_queue_submission_worker_main, queue)) < 0) diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index e39a9a041b..6d33968152 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -3278,6 +3278,7 @@ static void d3d12_device_destroy(struct d3d12_device *device) vkd3d_cleanup_format_info(device); vkd3d_memory_info_cleanup(&device->memory_info, device); + vkd3d_queue_timeline_trace_cleanup(&device->queue_timeline_trace); vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); #ifdef VKD3D_ENABLE_BREADCRUMBS vkd3d_breadcrumb_tracer_cleanup_barrier_hashes(&device->breadcrumb_tracer); @@ -8296,13 +8297,16 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_shader_debug_ring_init(&device->debug_ring, device))) goto out_cleanup_meta_ops; + if (FAILED(hr = vkd3d_queue_timeline_trace_init(&device->queue_timeline_trace))) + goto out_cleanup_debug_ring; + vkd3d_scratch_pool_init(device); #ifdef VKD3D_ENABLE_BREADCRUMBS vkd3d_breadcrumb_tracer_init_barrier_hashes(&device->breadcrumb_tracer); if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) if (FAILED(hr = vkd3d_breadcrumb_tracer_init(&device->breadcrumb_tracer, device))) - goto out_cleanup_debug_ring; + goto out_cleanup_queue_timeline_trace; #endif if (vkd3d_descriptor_debug_active_qa_checks()) @@ -8349,9 +8353,11 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, #ifdef VKD3D_ENABLE_BREADCRUMBS if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS) vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device); -out_cleanup_debug_ring: +out_cleanup_queue_timeline_trace: vkd3d_breadcrumb_tracer_cleanup_barrier_hashes(&device->breadcrumb_tracer); + vkd3d_queue_timeline_trace_cleanup(&device->queue_timeline_trace); #endif +out_cleanup_debug_ring: vkd3d_shader_debug_ring_cleanup(&device->debug_ring, device); out_cleanup_meta_ops: vkd3d_meta_ops_cleanup(&device->meta_ops, device); diff --git a/libs/vkd3d/heap.c b/libs/vkd3d/heap.c index f7818abbe4..cffcfd793b 100644 --- a/libs/vkd3d/heap.c +++ b/libs/vkd3d/heap.c @@ -317,6 +317,9 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev heap->allocation.chunk == NULL /* not suballocated */ && (device->memory_properties.memoryTypes[heap->allocation.device_allocation.vk_memory_type].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + vkd3d_queue_timeline_trace_register_instantaneous(&device->queue_timeline_trace, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_HEAP_ALLOCATION, desc->SizeInBytes); + d3d12_device_add_ref(heap->device); return S_OK; } diff --git a/libs/vkd3d/memory.c b/libs/vkd3d/memory.c index 22528a4108..417bf3c5c7 100644 --- a/libs/vkd3d/memory.c +++ b/libs/vkd3d/memory.c @@ -448,7 +448,7 @@ static HRESULT vkd3d_memory_transfer_queue_flush_locked(struct vkd3d_memory_tran vkd3d_queue_add_wait(queue_family->queues[i], NULL, queue->vk_semaphore, - queue->next_signal_value); + queue->next_signal_value, 0); } } diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build index 54955a89e8..a740f64763 100644 --- a/libs/vkd3d/meson.build +++ b/libs/vkd3d/meson.build @@ -62,7 +62,8 @@ vkd3d_src = [ 'vkd3d_main.c', 'raytracing_pipeline.c', 'acceleration_structure.c', - 'swapchain.c' + 'swapchain.c', + 'queue_timeline.c' ] if enable_renderdoc diff --git a/libs/vkd3d/queue_timeline.c b/libs/vkd3d/queue_timeline.c new file mode 100644 index 0000000000..7b5d62f9a0 --- /dev/null +++ b/libs/vkd3d/queue_timeline.c @@ -0,0 +1,563 @@ +/* + * Copyright 2023 Hans-Kristian Arntzen for Valve Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API +#include "vkd3d_private.h" +#include "vkd3d_platform.h" +#include "vkd3d_threads.h" +#include +#include + +#define NUM_ENTRIES (256 * 1024) + +HRESULT vkd3d_queue_timeline_trace_init(struct vkd3d_queue_timeline_trace *trace) +{ + char env[VKD3D_PATH_MAX]; + unsigned int i; + + if (!vkd3d_get_env_var("VKD3D_QUEUE_PROFILE", env, sizeof(env))) + return S_OK; + + trace->file = fopen(env, "w"); + if (trace->file) + { + INFO("Creating timeline trace in: \"%s\".\n", env); + fputs("[\n", trace->file); + } + else + return S_OK; + + pthread_mutex_init(&trace->lock, NULL); + pthread_mutex_init(&trace->ready_lock, NULL); + + vkd3d_array_reserve((void**)&trace->vacant_indices, &trace->vacant_indices_size, + NUM_ENTRIES, sizeof(*trace->vacant_indices)); + + /* Reserve entry 0 as sentinel. */ + for (i = 1; i < NUM_ENTRIES; i++) + trace->vacant_indices[trace->vacant_indices_count++] = i; + + trace->state = vkd3d_calloc(NUM_ENTRIES, sizeof(*trace->state)); + trace->base_ts = vkd3d_get_current_time_ns(); + + if (vkd3d_get_env_var("VKD3D_QUEUE_PROFILE_ABSOLUTE", env, sizeof(env)) && + env[0] == '1') + { + /* Wine logs are QPC relative */ + trace->base_ts = 0; + + /* Force an event at ts = 0 so the trace gets absolute time. */ + fprintf(trace->file, + "{ \"name\": \"dummy\", \"ph\": \"i\", \"tid\": \"0x%04x\", \"pid\": 0, \"ts\": 0.0 },\n", + vkd3d_get_current_thread_id()); + } + + trace->active = true; + return S_OK; +} + +static void vkd3d_queue_timeline_trace_free_index(struct vkd3d_queue_timeline_trace *trace, unsigned int index) +{ + assert(trace->state[index].type != VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_NONE); + trace->state[index].type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_NONE; + + pthread_mutex_lock(&trace->lock); + assert(trace->vacant_indices_count < trace->vacant_indices_size); + trace->vacant_indices[trace->vacant_indices_count++] = index; + pthread_mutex_unlock(&trace->lock); +} + +static void vkd3d_queue_timeline_trace_free_indices(struct vkd3d_queue_timeline_trace *trace, + const unsigned int *indices, size_t count) +{ + size_t i; + pthread_mutex_lock(&trace->lock); + + for (i = 0; i < count; i++) + { + assert(trace->state[indices[i]].type != VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_NONE); + trace->state[indices[i]].type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_NONE; + } + + assert(trace->vacant_indices_count + count <= trace->vacant_indices_size); + memcpy(trace->vacant_indices + trace->vacant_indices_count, indices, count * sizeof(*indices)); + trace->vacant_indices_count += count; + pthread_mutex_unlock(&trace->lock); +} + +static unsigned int vkd3d_queue_timeline_trace_allocate_index(struct vkd3d_queue_timeline_trace *trace, uint64_t *submit_count) +{ + unsigned int index = 0; + pthread_mutex_lock(&trace->lock); + if (trace->vacant_indices_count == 0) + { + ERR("Failed to allocate queue timeline index.\n"); + goto unlock; + } + index = trace->vacant_indices[--trace->vacant_indices_count]; +unlock: + if (submit_count) + *submit_count = ++trace->submit_count; + pthread_mutex_unlock(&trace->lock); + return index; +} + +void vkd3d_queue_timeline_trace_cleanup(struct vkd3d_queue_timeline_trace *trace) +{ + if (!trace->active) + return; + + pthread_mutex_destroy(&trace->lock); + pthread_mutex_destroy(&trace->ready_lock); + if (trace->file) + fclose(trace->file); + + vkd3d_free(trace->vacant_indices); + vkd3d_free(trace->ready_command_lists); + vkd3d_free(trace->state); +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_event_signal(struct vkd3d_queue_timeline_trace *trace, + vkd3d_native_sync_handle handle, d3d12_fence_iface *fence, uint64_t value) +{ + struct vkd3d_queue_timeline_trace_cookie cookie = {0}; + struct vkd3d_queue_timeline_trace_state *state; + + if (!trace->active) + return cookie; + + cookie.index = vkd3d_queue_timeline_trace_allocate_index(trace, NULL); + if (!cookie.index) + return cookie; + + state = &trace->state[cookie.index]; + state->type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_EVENT; + state->start_ts = vkd3d_get_current_time_ns(); + +#ifdef _WIN32 + snprintf(state->desc, sizeof(state->desc), "event: %p, fence: %p, value %"PRIu64, + handle.handle, (void*)fence, value); +#else + snprintf(state->desc, sizeof(state->desc), "event: %d, fence: %p, value %"PRIu64, + handle.fd, (void*)fence, value); +#endif + + return cookie; +} + +void vkd3d_queue_timeline_trace_complete_event_signal(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_fence_worker *worker, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + const struct vkd3d_queue_timeline_trace_state *state; + double end_ts, start_ts; + unsigned int pid; + + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + end_ts = (double)(vkd3d_get_current_time_ns() - trace->base_ts) * 1e-3; + start_ts = (double)(state->start_ts - trace->base_ts) * 1e-3; + + if (worker) + { + pid = worker->queue->submission_thread_tid; + if (start_ts < worker->timeline.lock_end_event_ts) + start_ts = worker->timeline.lock_end_event_ts; + if (end_ts < start_ts) + end_ts = start_ts; + worker->timeline.lock_end_event_ts = end_ts; + + fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"event\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n", + state->desc, pid, start_ts, end_ts - start_ts); + } + else + { + fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"inline\", \"pid\": \"shared fence\", \"ts\": %f, \"dur\": %f },\n", + state->desc, start_ts, end_ts - start_ts); + } + + vkd3d_queue_timeline_trace_free_index(trace, cookie.index); +} + +void vkd3d_queue_timeline_trace_complete_present_wait(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + const struct vkd3d_queue_timeline_trace_state *state; + double end_ts, start_ts; + + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + end_ts = (double)(vkd3d_get_current_time_ns() - trace->base_ts) * 1e-3; + start_ts = (double)(state->start_ts - trace->base_ts) * 1e-3; + + fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"wait\", \"pid\": \"present\", \"ts\": %f, \"dur\": %f },\n", + state->desc, start_ts, end_ts - start_ts); + + vkd3d_queue_timeline_trace_free_index(trace, cookie.index); +} + +void vkd3d_queue_timeline_trace_complete_present_block(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + const struct vkd3d_queue_timeline_trace_state *state; + double end_ts, start_ts; + + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + end_ts = (double)(vkd3d_get_current_time_ns() - trace->base_ts) * 1e-3; + start_ts = (double)(state->start_ts - trace->base_ts) * 1e-3; + + fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"0x%04x\", \"pid\": \"present\", \"ts\": %f, \"dur\": %f },\n", + state->desc, state->tid, start_ts, end_ts - start_ts); + + vkd3d_queue_timeline_trace_free_index(trace, cookie.index); +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_execute(struct vkd3d_queue_timeline_trace *trace, + ID3D12CommandList * const *command_lists, unsigned int count) +{ + struct vkd3d_queue_timeline_trace_cookie cookie = {0}; + struct vkd3d_queue_timeline_trace_state *state; + uint64_t submission_count; + if (!trace->active) + return cookie; + + cookie.index = vkd3d_queue_timeline_trace_allocate_index(trace, &submission_count); + if (!cookie.index) + return cookie; + + state = &trace->state[cookie.index]; + state->type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION; + state->start_ts = vkd3d_get_current_time_ns(); + snprintf(state->desc, sizeof(state->desc), "SUBMIT #%"PRIu64" (%u lists)", submission_count, count); + + /* Might be useful later. */ + (void)command_lists; + + return cookie; +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_signal(struct vkd3d_queue_timeline_trace *trace, + d3d12_fence_iface *fence, uint64_t value) +{ + struct vkd3d_queue_timeline_trace_cookie cookie = {0}; + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active) + return cookie; + + cookie.index = vkd3d_queue_timeline_trace_allocate_index(trace, NULL); + if (!cookie.index) + return cookie; + + state = &trace->state[cookie.index]; + state->type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SIGNAL; + state->start_ts = vkd3d_get_current_time_ns(); + state->start_submit_ts = state->start_ts; + snprintf(state->desc, sizeof(state->desc), "SIGNAL %p %"PRIu64, (void*)fence, value); + return cookie; +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_wait(struct vkd3d_queue_timeline_trace *trace, + d3d12_fence_iface *fence, uint64_t value) +{ + struct vkd3d_queue_timeline_trace_cookie cookie = {0}; + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active) + return cookie; + + cookie.index = vkd3d_queue_timeline_trace_allocate_index(trace, NULL); + if (!cookie.index) + return cookie; + + state = &trace->state[cookie.index]; + state->type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_WAIT; + state->start_ts = vkd3d_get_current_time_ns(); + state->start_submit_ts = state->start_ts; + snprintf(state->desc, sizeof(state->desc), "WAIT %p %"PRIu64, (void*)fence, value); + return cookie; +} + +static struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_generic_op(struct vkd3d_queue_timeline_trace *trace, + enum vkd3d_queue_timeline_trace_state_type type, const char *tag) +{ + struct vkd3d_queue_timeline_trace_cookie cookie = {0}; + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active) + return cookie; + + cookie.index = vkd3d_queue_timeline_trace_allocate_index(trace, NULL); + if (!cookie.index) + return cookie; + + state = &trace->state[cookie.index]; + state->type = type; + state->start_ts = vkd3d_get_current_time_ns(); + state->start_submit_ts = state->start_ts; + state->tid = vkd3d_get_current_thread_id(); + vkd3d_strlcpy(state->desc, sizeof(state->desc), tag); + return cookie; +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_swapchain_blit(struct vkd3d_queue_timeline_trace *trace, uint64_t present_id) +{ + char str[128]; + snprintf(str, sizeof(str), "PRESENT (id = %"PRIu64") (blit)", present_id); + return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_BLIT, str); +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_command_list(struct vkd3d_queue_timeline_trace *trace) +{ + struct vkd3d_queue_timeline_trace_cookie cookie = {0}; + struct vkd3d_queue_timeline_trace_state *state; + uint64_t submission_count; + if (!trace->active) + return cookie; + + cookie.index = vkd3d_queue_timeline_trace_allocate_index(trace, &submission_count); + if (!cookie.index) + return cookie; + + state = &trace->state[cookie.index]; + state->type = VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMAND_LIST; + state->start_ts = vkd3d_get_current_time_ns(); + state->record_cookie = submission_count; + return cookie; +} + +void vkd3d_queue_timeline_trace_close_command_list(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + state->record_end_ts = vkd3d_get_current_time_ns(); + state->tid = vkd3d_get_current_thread_id(); + + /* Defer actual IO until fence workers are doing something. */ + pthread_mutex_lock(&trace->ready_lock); + vkd3d_array_reserve((void**)&trace->ready_command_lists, &trace->ready_command_lists_size, + trace->ready_command_lists_count + 1, sizeof(*trace->ready_command_lists)); + trace->ready_command_lists[trace->ready_command_lists_count++] = cookie.index; + pthread_mutex_unlock(&trace->ready_lock); +} + +void vkd3d_queue_timeline_trace_register_instantaneous(struct vkd3d_queue_timeline_trace *trace, + enum vkd3d_queue_timeline_trace_state_type type, uint64_t value) +{ + struct vkd3d_queue_timeline_trace_state *state; + unsigned int index; + + if (!trace->active) + return; + + index = vkd3d_queue_timeline_trace_allocate_index(trace, NULL); + if (!index) + return; + + state = &trace->state[index]; + state->type = type; + state->start_ts = vkd3d_get_current_time_ns(); + state->tid = vkd3d_get_current_thread_id(); + state->record_cookie = value; + + /* Defer actual IO until fence workers are doing something. */ + pthread_mutex_lock(&trace->ready_lock); + vkd3d_array_reserve((void**)&trace->ready_command_lists, &trace->ready_command_lists_size, + trace->ready_command_lists_count + 1, sizeof(*trace->ready_command_lists)); + trace->ready_command_lists[trace->ready_command_lists_count++] = index; + pthread_mutex_unlock(&trace->ready_lock); +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_present_wait(struct vkd3d_queue_timeline_trace *trace, uint64_t present_id) +{ + char str[128]; + snprintf(str, sizeof(str), "WAIT (id = %"PRIu64")", present_id); + return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_WAIT, str); +} + +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_present_block(struct vkd3d_queue_timeline_trace *trace, uint64_t present_id) +{ + char str[128]; + snprintf(str, sizeof(str), "PRESENT (id = %"PRIu64")", present_id); + return vkd3d_queue_timeline_trace_register_generic_op(trace, VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_BLOCK, str); +} + +static void vkd3d_queue_timeline_trace_flush_instantaneous(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_fence_worker *worker) +{ + const struct vkd3d_queue_timeline_trace_state *list_state; + size_t list_count; + size_t i; + + pthread_mutex_lock(&trace->ready_lock); + if (trace->ready_command_lists_count) + { + /* Copy to local buffer to not stall recording threads while doing IO. */ + vkd3d_array_reserve((void**)&worker->timeline.list_buffer, + &worker->timeline.list_buffer_size, + trace->ready_command_lists_count, sizeof(*worker->timeline.list_buffer)); + memcpy(worker->timeline.list_buffer, + trace->ready_command_lists, trace->ready_command_lists_count * sizeof(*worker->timeline.list_buffer)); + list_count = trace->ready_command_lists_count; + trace->ready_command_lists_count = 0; + pthread_mutex_unlock(&trace->ready_lock); + + for (i = 0; i < list_count; i++) + { + const char *generic_pid = NULL; + double start_ts; + double end_ts; + + list_state = &trace->state[worker->timeline.list_buffer[i]]; + start_ts = (double)(list_state->start_ts - trace->base_ts) * 1e-3; + + switch (list_state->type) + { + case VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMAND_LIST: + { + end_ts = (double)(list_state->record_end_ts - trace->base_ts) * 1e-3; + fprintf(trace->file, + "{ \"name\": \"%"PRIu64 " (delay %.3f us)\", \"ph\": \"i\", \"tid\": \"0x%04x\", \"pid\": \"cmd reset\", \"ts\": %f },\n", + list_state->record_cookie, end_ts - start_ts, list_state->tid, start_ts); + fprintf(trace->file, + "{ \"name\": \"%"PRIu64" (delay %.3f us)\", \"ph\": \"i\", \"tid\": \"0x%04x\", \"pid\": \"cmd close\", \"ts\": %f },\n", + list_state->record_cookie, end_ts - start_ts, list_state->tid, end_ts); + break; + } + + case VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_QUEUE_PRESENT: + generic_pid = "queue present"; + break; + + case VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_HEAP_ALLOCATION: + generic_pid = "heap allocate"; + break; + + case VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMAND_ALLOCATOR_RESET: + generic_pid = "command allocator reset"; + break; + + case VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMITTED_RESOURCE_ALLOCATION: + generic_pid = "committed resource alloc"; + break; + + default: + break; + } + + if (generic_pid) + { + fprintf(trace->file, + "{ \"name\": \"%"PRIu64"\", \"ph\": \"i\", \"tid\": \"0x%04x\", \"pid\": \"%s\", \"ts\": %f },\n", + list_state->record_cookie, list_state->tid, generic_pid, start_ts); + } + } + + vkd3d_queue_timeline_trace_free_indices(trace, worker->timeline.list_buffer, list_count); + } + else + pthread_mutex_unlock(&trace->ready_lock); +} + +void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_fence_worker *worker, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + const struct vkd3d_queue_timeline_trace_state *state; + double end_ts, start_submit_ts, start_ts; + unsigned int pid; + const char *tid; + double *ts_lock; + + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + start_ts = (double)(state->start_ts - trace->base_ts) * 1e-3; + start_submit_ts = (double)(state->start_submit_ts - trace->base_ts) * 1e-3; + end_ts = (double)(vkd3d_get_current_time_ns() - trace->base_ts) * 1e-3; + + if (worker) + { + if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION) + vkd3d_queue_timeline_trace_flush_instantaneous(trace, worker); + + tid = worker->timeline.tid; + pid = worker->queue->submission_thread_tid; + + if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION) + { + fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"i\", \"tid\": \"cpu\", \"pid\": \"0x%04x\", \"ts\": %f, \"s\": \"t\" },\n", + state->desc, pid, start_ts); + + if (start_ts < worker->timeline.lock_end_cpu_ts) + start_ts = worker->timeline.lock_end_cpu_ts; + if (start_submit_ts < start_ts) + start_submit_ts = start_ts; + } + + ts_lock = &worker->timeline.lock_end_gpu_ts; + + if (start_submit_ts < *ts_lock) + start_submit_ts = *ts_lock; + if (end_ts < start_submit_ts) + end_ts = start_submit_ts; + *ts_lock = end_ts; + + fprintf(trace->file, "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"%s\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n", + state->desc, tid, pid, start_submit_ts, end_ts - start_submit_ts); + + if (state->type == VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION) + { + worker->timeline.lock_end_cpu_ts = start_submit_ts; + fprintf(trace->file, + "{ \"name\": \"%s\", \"ph\": \"X\", \"tid\": \"submit\", \"pid\": \"0x%04x\", \"ts\": %f, \"dur\": %f },\n", + state->desc, pid, start_ts, start_submit_ts - start_ts); + } + } + + vkd3d_queue_timeline_trace_free_index(trace, cookie.index); +} + +void vkd3d_queue_timeline_trace_begin_execute(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie) +{ + struct vkd3d_queue_timeline_trace_state *state; + if (!trace->active || cookie.index == 0) + return; + + state = &trace->state[cookie.index]; + state->start_submit_ts = vkd3d_get_current_time_ns(); +} diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 88a8187bec..2132b894e5 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -3451,6 +3451,9 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12 object->mem.chunk == NULL /* not suballocated */ && (device->memory_properties.memoryTypes[object->mem.device_allocation.vk_memory_type].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + vkd3d_queue_timeline_trace_register_instantaneous(&device->queue_timeline_trace, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMITTED_RESOURCE_ALLOCATION, object->res.cookie); + *resource = object; return S_OK; diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c index e2d4ebbf2f..d00302d17a 100644 --- a/libs/vkd3d/swapchain.c +++ b/libs/vkd3d/swapchain.c @@ -726,6 +726,7 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if { struct dxgi_vk_swap_chain *chain = impl_from_IDXGIVkSwapChain(iface); struct dxgi_vk_swap_chain_present_request *request; + struct vkd3d_queue_timeline_trace_cookie cookie; TRACE("iface %p, SyncInterval %u, PresentFlags #%x, pPresentParameters %p.\n", iface, SyncInterval, PresentFlags, pPresentParameters); (void)pPresentParameters; @@ -769,6 +770,9 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if chain->user.index = (chain->user.index + 1) % chain->desc.BufferCount; + cookie = vkd3d_queue_timeline_trace_register_present_block( + &chain->queue->device->queue_timeline_trace, chain->user.blit_count); + /* Relevant if application does not use latency fence, or we force a lower latency through VKD3D_SWAPCHAIN_FRAME_LATENCY overrides. */ if (vkd3d_native_sync_handle_is_valid(chain->frame_latency_event_internal)) vkd3d_native_sync_handle_acquire(chain->frame_latency_event_internal); @@ -798,6 +802,9 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if if (chain->debug_latency) chain->user.begin_frame_time_ns = vkd3d_get_current_time_ns(); + vkd3d_queue_timeline_trace_complete_present_block( + &chain->queue->device->queue_timeline_trace, cookie); + return S_OK; } @@ -1431,6 +1438,7 @@ static bool request_needs_swapchain_recreation(const struct dxgi_vk_swap_chain_p static void dxgi_vk_swap_chain_present_signal_blit_semaphore(struct dxgi_vk_swap_chain *chain) { const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + struct vkd3d_queue_timeline_trace_cookie cookie; VkSemaphoreSubmitInfo signal_semaphore_info; VkSubmitInfo2 submit_info; VkQueue vk_queue; @@ -1453,6 +1461,16 @@ static void dxgi_vk_swap_chain_present_signal_blit_semaphore(struct dxgi_vk_swap vr = VK_CALL(vkQueueSubmit2(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); vkd3d_queue_release(chain->queue->vkd3d_queue); + /* Mark frame boundary. */ + cookie = vkd3d_queue_timeline_trace_register_swapchain_blit( + &chain->queue->device->queue_timeline_trace, chain->present.blit_count); + + if (vkd3d_queue_timeline_trace_cookie_is_valid(cookie)) + { + vkd3d_enqueue_timeline_semaphore(&chain->queue->fence_worker, NULL, chain->present.vk_blit_semaphore, + chain->present.blit_count, false, NULL, 0, &cookie); + } + if (vr) { ERR("Failed to submit present discard, vr = %d.\n", vr); @@ -1823,6 +1841,10 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai if (present_info.pNext && vr >= 0) chain->present.present_id_valid = true; + vkd3d_queue_timeline_trace_register_instantaneous(&chain->queue->device->queue_timeline_trace, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_QUEUE_PRESENT, + chain->present.present_id_valid ? chain->present.present_id : 0); + /* Handle any errors and retry as needed. If we cannot make meaningful forward progress, just give up and retry later. */ if (vr == VK_SUBOPTIMAL_KHR || vr < 0) chain->present.force_swapchain_recreation = true; @@ -1955,7 +1977,9 @@ static void *dxgi_vk_swap_chain_wait_worker(void *chain_) { struct dxgi_vk_swap_chain *chain = chain_; + struct vkd3d_queue_timeline_trace *timeline_trace = &chain->queue->device->queue_timeline_trace; const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + struct vkd3d_queue_timeline_trace_cookie cookie; uint64_t begin_frame_time_ns = 0; uint64_t end_frame_time_ns = 0; uint64_t next_wait_id = 0; @@ -1976,10 +2000,14 @@ static void *dxgi_vk_swap_chain_wait_worker(void *chain_) if (!next_wait_id) break; + cookie = vkd3d_queue_timeline_trace_register_present_wait(timeline_trace, next_wait_id); + /* We don't really care if we observed OUT_OF_DATE or something here. */ VK_CALL(vkWaitForPresentKHR(chain->queue->device->vk_device, chain->present.vk_swapchain, next_wait_id, UINT64_MAX)); + vkd3d_queue_timeline_trace_complete_present_wait(timeline_trace, cookie); + if (begin_frame_time_ns) end_frame_time_ns = vkd3d_get_current_time_ns(); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 1dc7069d35..8ea1a4b3d6 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -203,6 +203,11 @@ struct vkd3d_instance extern uint64_t vkd3d_config_flags; extern struct vkd3d_shader_quirk_info vkd3d_shader_quirk_info; +struct vkd3d_queue_timeline_trace_cookie +{ + unsigned int index; +}; + struct vkd3d_waiting_fence { d3d12_fence_iface *fence; @@ -210,6 +215,7 @@ struct vkd3d_waiting_fence uint64_t value; struct d3d12_command_allocator **command_allocators; size_t num_command_allocators; + struct vkd3d_queue_timeline_trace_cookie timeline_cookie; bool signal; }; @@ -225,12 +231,29 @@ struct vkd3d_fence_worker size_t enqueued_fences_size; struct d3d12_device *device; -}; + struct d3d12_command_queue *queue; -HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, - struct d3d12_device *device); -HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, - struct d3d12_device *device); + /* To aid timeline profiles. A single fence worker processes work monotonically. */ + struct + { + char tid[64]; + /* The lock timestamps is to ensure that the timeline trace becomes readable in chrome://tracing. + * For us, start and end ranges can overlap. This ends up as an unreadable trace + * since the tracer expects a stack-like nesting for overlapping events. + * To work around this, we ensure that start TS of a following event is moved to end TS of previous event. */ + double lock_end_gpu_ts; + double lock_end_cpu_ts; + double lock_end_event_ts; + double lock_end_present_wait_ts; + unsigned int *list_buffer; + size_t list_buffer_size; + } timeline; +}; + +HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, + d3d12_fence_iface *fence, VkSemaphore timeline, uint64_t value, bool signal, + struct d3d12_command_allocator **command_allocators, size_t num_command_allocators, + const struct vkd3d_queue_timeline_trace_cookie *timeline_cookie); /* 2 MiB is a good threshold, because it's huge page size. */ #define VKD3D_VA_BLOCK_SIZE_BITS (21) @@ -500,6 +523,7 @@ struct vkd3d_waiting_event vkd3d_native_sync_handle handle; bool *latch; uint32_t *payload; + struct vkd3d_queue_timeline_trace_cookie timeline_cookie; }; struct d3d12_fence @@ -2842,6 +2866,7 @@ struct d3d12_command_list struct d3d12_transfer_batch_state transfer_batch; struct d3d12_wbi_batch_state wbi_batch; struct d3d12_rtas_batch_state rtas_batch; + struct vkd3d_queue_timeline_trace_cookie timeline_cookie; struct vkd3d_private_store private_store; @@ -2948,6 +2973,8 @@ struct vkd3d_queue VkSemaphoreSubmitInfo *wait_semaphores; size_t wait_semaphores_size; + uint64_t *wait_values_virtual; + size_t wait_values_virtual_size; d3d12_fence_iface **wait_fences; size_t wait_fences_size; uint32_t wait_count; @@ -2958,7 +2985,8 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, u const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue); -void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, VkSemaphore semaphore, uint64_t value); +void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, + VkSemaphore semaphore, uint64_t value, uint64_t virtual_value); enum vkd3d_submission_type { @@ -3021,6 +3049,8 @@ struct d3d12_command_queue_submission_execute size_t breadcrumb_indices_count; #endif + struct vkd3d_queue_timeline_trace_cookie timeline_cookie; + bool debug_capture; bool split_submission; }; @@ -3093,6 +3123,7 @@ struct d3d12_command_queue struct vkd3d_fence_worker fence_worker; struct vkd3d_private_store private_store; struct dxgi_vk_swap_chain_factory vk_swap_chain_factory; + unsigned int submission_thread_tid; }; HRESULT d3d12_command_queue_create(struct d3d12_device *device, @@ -4286,6 +4317,114 @@ struct d3d12_device_scratch_pool unsigned int high_water_mark; }; +enum vkd3d_queue_timeline_trace_state_type +{ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_NONE = 0, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_EVENT, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SUBMISSION, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_WAIT, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_SIGNAL, + + /* Blit task */ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_BLIT, + + /* Waiting for present wait to complete. */ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_WAIT, + + /* Time spent blocking in ::Present() in user thread. */ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_PRESENT_BLOCK, + + /* Reset() and Close() are useful instant events to see when command recording is happening and + * which threads do so. */ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMAND_LIST, + + /* Misc instantaneous events that are expected to be heavy. */ + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_QUEUE_PRESENT, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMITTED_RESOURCE_ALLOCATION, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_HEAP_ALLOCATION, + VKD3D_QUEUE_TIMELINE_TRACE_STATE_TYPE_COMMAND_ALLOCATOR_RESET, +}; + +struct vkd3d_queue_timeline_trace_state +{ + enum vkd3d_queue_timeline_trace_state_type type; + unsigned int tid; + uint64_t start_ts; + uint64_t start_submit_ts; + uint64_t record_end_ts; + uint64_t record_cookie; + char desc[128 - 5 * sizeof(uint64_t)]; +}; + +struct vkd3d_queue_timeline_trace +{ + pthread_mutex_t lock; + pthread_mutex_t ready_lock; + FILE *file; + bool active; + + unsigned int *vacant_indices; + size_t vacant_indices_count; + size_t vacant_indices_size; + + unsigned int *ready_command_lists; + size_t ready_command_lists_count; + size_t ready_command_lists_size; + + struct vkd3d_queue_timeline_trace_state *state; + uint64_t base_ts; + uint64_t submit_count; +}; + +static inline bool vkd3d_queue_timeline_trace_cookie_is_valid(struct vkd3d_queue_timeline_trace_cookie cookie) +{ + return cookie.index != 0; +} + +HRESULT vkd3d_queue_timeline_trace_init(struct vkd3d_queue_timeline_trace *trace); +void vkd3d_queue_timeline_trace_cleanup(struct vkd3d_queue_timeline_trace *trace); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_event_signal(struct vkd3d_queue_timeline_trace *trace, + vkd3d_native_sync_handle handle, d3d12_fence_iface *fence, uint64_t value); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_signal(struct vkd3d_queue_timeline_trace *trace, + d3d12_fence_iface *fence, uint64_t value); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_wait(struct vkd3d_queue_timeline_trace *trace, + d3d12_fence_iface *fence, uint64_t value); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_swapchain_blit(struct vkd3d_queue_timeline_trace *trace, + uint64_t present_id); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_present_wait(struct vkd3d_queue_timeline_trace *trace, + uint64_t present_id); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_present_block(struct vkd3d_queue_timeline_trace *trace, + uint64_t present_id); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_execute(struct vkd3d_queue_timeline_trace *trace, + ID3D12CommandList * const *command_lists, unsigned int count); +struct vkd3d_queue_timeline_trace_cookie +vkd3d_queue_timeline_trace_register_command_list(struct vkd3d_queue_timeline_trace *trace); + +void vkd3d_queue_timeline_trace_register_instantaneous(struct vkd3d_queue_timeline_trace *trace, + enum vkd3d_queue_timeline_trace_state_type type, uint64_t value); + +void vkd3d_queue_timeline_trace_complete_event_signal(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_fence_worker *worker, + struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_complete_execute(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_fence_worker *worker, + struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_complete_present_wait(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_complete_present_block(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_close_command_list(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie); +void vkd3d_queue_timeline_trace_begin_execute(struct vkd3d_queue_timeline_trace *trace, + struct vkd3d_queue_timeline_trace_cookie cookie); + struct d3d12_device { d3d12_device_iface ID3D12Device_iface; @@ -4342,6 +4481,7 @@ struct d3d12_device unsigned int format_compatibility_list_count; const struct vkd3d_format_compatibility_list *format_compatibility_lists; struct vkd3d_bindless_state bindless_state; + struct vkd3d_queue_timeline_trace queue_timeline_trace; struct vkd3d_memory_info memory_info; struct vkd3d_meta_ops meta_ops; struct vkd3d_view_map sampler_map;