From 8520ef556b3b7e4cdf3c02b3b1ae5fc3aaf94a66 Mon Sep 17 00:00:00 2001 From: Stepan Usatiuk Date: Thu, 9 Oct 2025 18:14:17 +0200 Subject: [PATCH] better gameboy timings --- Firmware/main/include/display.hpp | 4 +- Firmware/main/include/spi_global.hpp | 2 +- Firmware/main/src/app_main.cpp | 157 +++++++++++++++++++++++++ Firmware/main/src/apps/gameboy_app.cpp | 128 +++++++++++++------- Firmware/main/src/display.cpp | 4 +- 5 files changed, 251 insertions(+), 44 deletions(-) diff --git a/Firmware/main/include/display.hpp b/Firmware/main/include/display.hpp index 312f5c3..1fbecec 100644 --- a/Firmware/main/include/display.hpp +++ b/Firmware/main/include/display.hpp @@ -51,10 +51,10 @@ extern "C" void s_spi_post_cb(spi_transaction_t* trans); static inline spi_device_interface_config_t _devcfg = { .mode = 0, // SPI mode 0 - .clock_speed_hz = 6 * 1000 * 1000, // Clock out at 10 MHz + .clock_speed_hz = 10 * 1000 * 1000, // Clock out at 10 MHz .spics_io_num = SPI_DISP_CS, // CS pin .flags = SPI_DEVICE_POSITIVE_CS, - .queue_size = 3, + .queue_size = 1, .pre_cb = nullptr, .post_cb = s_spi_post_cb, }; diff --git a/Firmware/main/include/spi_global.hpp b/Firmware/main/include/spi_global.hpp index d2f40ec..1a8395c 100644 --- a/Firmware/main/include/spi_global.hpp +++ b/Firmware/main/include/spi_global.hpp @@ -20,7 +20,7 @@ private: .sclk_io_num = SPI_SCK, .quadwp_io_num = -1, .quadhd_io_num = -1, - .max_transfer_sz = 400 * 240 * 2}; + .max_transfer_sz = 12482U}; }; // namespace spi_global diff --git a/Firmware/main/src/app_main.cpp b/Firmware/main/src/app_main.cpp index 1336037..5fa3147 100644 --- a/Firmware/main/src/app_main.cpp +++ b/Firmware/main/src/app_main.cpp @@ -20,6 +20,7 @@ #include #include + #include "freertos/FreeRTOS.h" #include "freertos/task.h" @@ -29,6 +30,160 @@ #include "esp_sleep.h" #include "sdkconfig.h" +#include +#include +#include +#include +#include +#include +#include + +#if CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS && CONFIG_FREERTOS_USE_TRACE_FACILITY +namespace { + +constexpr TickType_t kStatsTaskDelayTicks = pdMS_TO_TICKS(5000); +constexpr TickType_t kStatsWarmupDelay = pdMS_TO_TICKS(2000); +constexpr UBaseType_t kStatsTaskPriority = tskIDLE_PRIORITY + 1; +constexpr uint32_t kStatsTaskStack = 4096; +constexpr char kStatsTaskName[] = "TaskStats"; + +struct TaskRuntimeSample { + TaskHandle_t handle; + uint32_t runtime; +}; + +struct TaskUsageRow { + std::string name; + uint64_t delta; + UBaseType_t priority; + uint32_t stackHighWaterBytes; + bool isIdle; +}; + +[[nodiscard]] uint64_t deltaWithWrap(uint32_t current, uint32_t previous) { + if (current >= previous) + return static_cast(current - previous); + return static_cast(current) + (static_cast(UINT32_MAX) - previous) + 1ULL; +} + +void task_usage_monitor(void*) { + static constexpr char tag[] = "TaskUsage"; + std::vector lastSamples; + uint32_t lastTotal = 0; + + vTaskDelay(kStatsWarmupDelay); + + while (true) { + vTaskDelay(kStatsTaskDelayTicks); + + const UBaseType_t taskCount = uxTaskGetNumberOfTasks(); + if (taskCount == 0) + continue; + + std::vector statusBuffer(taskCount); + uint32_t totalRuntime = 0; + const UBaseType_t captured = uxTaskGetSystemState(statusBuffer.data(), statusBuffer.size(), &totalRuntime); + if (captured == 0) + continue; + statusBuffer.resize(captured); + + std::vector currentSamples; + currentSamples.reserve(statusBuffer.size()); + + if (lastTotal == 0) { + for (const auto& status: statusBuffer) { + currentSamples.push_back({status.xHandle, status.ulRunTimeCounter}); + } + lastSamples = std::move(currentSamples); + lastTotal = totalRuntime; + continue; + } + + const uint64_t totalDelta = deltaWithWrap(totalRuntime, lastTotal); + if (totalDelta == 0) + continue; + + std::vector rows; + rows.reserve(statusBuffer.size()); + + uint64_t idleDelta = 0; + uint64_t activeDelta = 0; + uint64_t accountedDelta = 0; + + for (const auto& status: statusBuffer) { + const auto it = std::find_if(lastSamples.begin(), lastSamples.end(), [&](const TaskRuntimeSample& entry) { + return entry.handle == status.xHandle; + }); + + const uint32_t previousRuntime = (it != lastSamples.end()) ? it->runtime : status.ulRunTimeCounter; + const uint64_t taskDelta = (it != lastSamples.end()) ? deltaWithWrap(status.ulRunTimeCounter, previousRuntime) : 0ULL; + + currentSamples.push_back({status.xHandle, status.ulRunTimeCounter}); + + TaskUsageRow row{ + .name = std::string(status.pcTaskName ? status.pcTaskName : ""), + .delta = taskDelta, + .priority = status.uxCurrentPriority, + .stackHighWaterBytes = static_cast(status.usStackHighWaterMark) * sizeof(StackType_t), + .isIdle = status.uxCurrentPriority == tskIDLE_PRIORITY || + (status.pcTaskName && std::strncmp(status.pcTaskName, "IDLE", 4) == 0) + }; + + rows.push_back(std::move(row)); + + accountedDelta += taskDelta; + if (rows.back().isIdle) + idleDelta += taskDelta; + else + activeDelta += taskDelta; + } + + lastSamples = std::move(currentSamples); + lastTotal = totalRuntime; + + if (rows.empty()) + continue; + + std::sort(rows.begin(), rows.end(), [](const TaskUsageRow& a, const TaskUsageRow& b) { + return a.delta > b.delta; + }); + + const double windowMs = static_cast(totalDelta) / 1000.0; + + std::printf("\n[%s] CPU usage over %.1f ms window\n", tag, windowMs); + + for (const auto& row: rows) { + if (row.delta == 0 || row.isIdle) + continue; + const double pct = (static_cast(row.delta) * 100.0) / static_cast(totalDelta); + std::printf(" %-16s %6.2f%% (prio=%u stack_free=%lu B)\n", row.name.c_str(), pct, row.priority, + static_cast(row.stackHighWaterBytes)); + } + + const double idlePct = (idleDelta * 100.0) / static_cast(totalDelta); + std::printf(" %-16s %6.2f%% (aggregated idle)\n", "", idlePct); + + const uint64_t residual = (accountedDelta >= totalDelta) ? 0ULL : (totalDelta - accountedDelta); + if (residual > 0) { + const double residualPct = (static_cast(residual) * 100.0) / static_cast(totalDelta); + std::printf(" %-16s %6.2f%% (ISRs / scheduler)\n", "", residualPct); + } + + std::printf("[%s] Active %.2f%% | Idle %.2f%%\n", tag, + (activeDelta * 100.0) / static_cast(totalDelta), idlePct); + std::fflush(stdout); + } +} + +void start_task_usage_monitor() { + xTaskCreatePinnedToCore(task_usage_monitor, kStatsTaskName, kStatsTaskStack, nullptr, kStatsTaskPriority, nullptr, 0); +} + +} // namespace +#else +inline void start_task_usage_monitor() {} +#endif + extern "C" void app_main() { #ifdef CONFIG_PM_ENABLE // const esp_pm_config_t pm_config = { @@ -71,5 +226,7 @@ extern "C" void app_main() { system.registerApp(apps::createTetrisAppFactory()); system.registerApp(apps::createGameboyAppFactory()); + start_task_usage_monitor(); + system.run(); } diff --git a/Firmware/main/src/apps/gameboy_app.cpp b/Firmware/main/src/apps/gameboy_app.cpp index 48fd2d5..d1726e0 100644 --- a/Firmware/main/src/apps/gameboy_app.cpp +++ b/Firmware/main/src/apps/gameboy_app.cpp @@ -27,6 +27,18 @@ #include #include +#define GAMEBOY_PERF_METRICS 0 + +#ifndef GAMEBOY_PERF_METRICS +#define GAMEBOY_PERF_METRICS 1 +#endif + +#if GAMEBOY_PERF_METRICS +#define GB_PERF_ONLY(...) __VA_ARGS__ +#else +#define GB_PERF_ONLY(...) +#endif + namespace apps { namespace { @@ -161,7 +173,8 @@ public: void onStart() override { cancelTick(); - perf.resetAll(); + frameDelayCarryUs = 0; + GB_PERF_ONLY(perf.resetAll();) prevInput = {}; statusMessage.clear(); resetFpsStats(); @@ -176,46 +189,52 @@ public: void onStop() override { cancelTick(); - perf.maybePrintAggregate(true); + frameDelayCarryUs = 0; + GB_PERF_ONLY(perf.maybePrintAggregate(true);) unloadRom(); } void handleEvent(const AppEvent& event) override { if (event.type == AppEventType::Timer && event.timer.handle == tickTimer) { tickTimer = kInvalidAppTimer; + const uint64_t frameStartUs = esp_timer_get_time(); performStep(); - scheduleNextTick(nextDelayMs()); + const uint64_t frameEndUs = esp_timer_get_time(); + const uint64_t elapsedUs = (frameEndUs >= frameStartUs) ? (frameEndUs - frameStartUs) : 0; + printf("Step took %" PRIu64 " us\n", elapsedUs); + scheduleAfterFrame(elapsedUs); return; } if (event.type == AppEventType::Button) { + frameDelayCarryUs = 0; scheduleNextTick(0); } } void performStep() { - perf.resetForStep(); + GB_PERF_ONLY(perf.resetForStep();) - const uint64_t inputStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t inputStartUs = esp_timer_get_time();) const InputState input = context.input.readState(); - perf.inputUs = esp_timer_get_time() - inputStartUs; + GB_PERF_ONLY(perf.inputUs = esp_timer_get_time() - inputStartUs;) const Mode stepMode = mode; switch (stepMode) { case Mode::Browse: { - const uint64_t handleStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t handleStartUs = esp_timer_get_time();) handleBrowserInput(input); - perf.handleUs = esp_timer_get_time() - handleStartUs; + GB_PERF_ONLY(perf.handleUs = esp_timer_get_time() - handleStartUs;) - const uint64_t renderStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t renderStartUs = esp_timer_get_time();) renderBrowser(); - perf.renderUs = esp_timer_get_time() - renderStartUs; + GB_PERF_ONLY(perf.renderUs = esp_timer_get_time() - renderStartUs;) break; } case Mode::Running: { - const uint64_t handleStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t handleStartUs = esp_timer_get_time();) handleGameInput(input); - perf.handleUs = esp_timer_get_time() - handleStartUs; + GB_PERF_ONLY(perf.handleUs = esp_timer_get_time() - handleStartUs;) if (!gbReady) { mode = Mode::Browse; @@ -223,32 +242,28 @@ public: break; } - const uint64_t geometryStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t geometryStartUs = esp_timer_get_time();) ensureRenderGeometry(); - perf.geometryUs = esp_timer_get_time() - geometryStartUs; + GB_PERF_ONLY(perf.geometryUs = esp_timer_get_time() - geometryStartUs;) - const uint64_t waitStartUs = esp_timer_get_time(); - DispTools::draw_to_display_async_wait(); - perf.waitUs = esp_timer_get_time() - waitStartUs; - - const uint64_t runStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t runStartUs = esp_timer_get_time();) gb_run_frame(&gb); - perf.runUs = esp_timer_get_time() - runStartUs; + GB_PERF_ONLY(perf.runUs = esp_timer_get_time() - runStartUs;) - const uint64_t renderStartUs = esp_timer_get_time(); + GB_PERF_ONLY(const uint64_t renderStartUs = esp_timer_get_time();) renderGameFrame(); - perf.renderUs = esp_timer_get_time() - renderStartUs; + GB_PERF_ONLY(perf.renderUs = esp_timer_get_time() - renderStartUs;) break; } } prevInput = input; - perf.finishStep(); - perf.accumulate(); - perf.printStep(stepMode, gbReady, frameDirty, fpsCurrent, activeRomName, roms.size(), selectedIndex, - browserDirty); - perf.maybePrintAggregate(); + GB_PERF_ONLY(perf.finishStep();) + GB_PERF_ONLY(perf.accumulate();) + GB_PERF_ONLY(perf.printStep(stepMode, gbReady, frameDirty, fpsCurrent, activeRomName, roms.size(), selectedIndex, + browserDirty);) + GB_PERF_ONLY(perf.maybePrintAggregate();) } private: @@ -458,22 +473,34 @@ private: class ScopedCallbackTimer { public: - ScopedCallbackTimer(GameboyApp* instance, PerfTracker::CallbackKind kind) : app(instance), kind(kind) { - if (app) - startUs = esp_timer_get_time(); + ScopedCallbackTimer(GameboyApp* instance, PerfTracker::CallbackKind kind) { +#if GAMEBOY_PERF_METRICS + if (instance) { + app = instance; + cbKind = kind; + startUs = esp_timer_get_time(); + } +#else + (void)instance; + (void)kind; +#endif } ~ScopedCallbackTimer() { +#if GAMEBOY_PERF_METRICS if (!app) return; const uint64_t end = esp_timer_get_time(); - app->perf.addCallback(kind, end - startUs); + app->perf.addCallback(cbKind, end - startUs); +#endif } private: - GameboyApp* app; - PerfTracker::CallbackKind kind; +#if GAMEBOY_PERF_METRICS + GameboyApp* app = nullptr; + PerfTracker::CallbackKind cbKind{}; uint64_t startUs = 0; +#endif }; struct RenderGeometry { @@ -491,10 +518,12 @@ private: std::array colXEnd{}; }; - AppContext& context; - Framebuffer& framebuffer; - PerfTracker perf{}; + AppContext& context; + Framebuffer& framebuffer; + PerfTracker perf{}; AppTimerHandle tickTimer = kInvalidAppTimer; + int64_t frameDelayCarryUs = 0; + static constexpr uint32_t kTargetFrameUs = 1000000 / 60; // ~16.6 ms Mode mode = Mode::Browse; ScaleMode scaleMode = ScaleMode::Original; @@ -532,10 +561,25 @@ private: tickTimer = context.scheduleTimer(delayMs, false); } - uint32_t nextDelayMs() const { - if (mode == Mode::Running) - return 0; - return browserDirty ? 50 : 140; + uint32_t idleDelayMs() const { return browserDirty ? 50 : 140; } + + void scheduleAfterFrame(uint64_t elapsedUs) { + if (mode == Mode::Running && gbReady) { + int64_t desiredUs = static_cast(kTargetFrameUs) - static_cast(elapsedUs); + desiredUs += frameDelayCarryUs; + if (desiredUs <= 0) { + frameDelayCarryUs = desiredUs; + scheduleNextTick(0); + return; + } + frameDelayCarryUs = desiredUs % 1000; + desiredUs -= frameDelayCarryUs; + uint32_t delayMs = static_cast(desiredUs / 1000); + scheduleNextTick(delayMs); + return; + } + frameDelayCarryUs = 0; + scheduleNextTick(idleDelayMs()); } bool ensureFilesystemReady() { @@ -1216,6 +1260,10 @@ private: Framebuffer& fb = self->framebuffer; + GB_PERF_ONLY(const uint64_t waitStartUs = esp_timer_get_time();) + DispTools::draw_to_display_async_wait(); + GB_PERF_ONLY(perf.waitUs = esp_timer_get_time() - waitStartUs;) + if (geom.scaledWidth == LCD_WIDTH && geom.scaledHeight == LCD_HEIGHT) { const int dstY = yStart; const int dstXBase = geom.offsetX; diff --git a/Firmware/main/src/display.cpp b/Firmware/main/src/display.cpp index 4958b70..b775c6f 100644 --- a/Firmware/main/src/display.cpp +++ b/Firmware/main/src/display.cpp @@ -51,7 +51,6 @@ extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* /*t*/) { static void clear_task(void*) { for (;;) { if (xSemaphoreTake(s_clearReqSem, portMAX_DELAY) == pdTRUE) { - printf("Started zeroing\n"); spi_transaction_t* r = nullptr; ESP_ERROR_CHECK(spi_device_get_trans_result(SMD::_spi, &r, 0)); zero_framebuffer_payload(); @@ -111,11 +110,14 @@ void SMD::async_draw_wait() { // assert((uxSemaphoreGetCount(s_clearSem) == 0) == _inFlight); return; } + auto now = esp_timer_get_time(); if (!xSemaphoreTake(s_clearSem, portMAX_DELAY)) assert(false); if (!xSemaphoreGive(s_clearSem)) assert(false); assert(!_inFlight); + auto elapsed = esp_timer_get_time() - now; + printf("Waited %" PRIu64 " us\n", elapsed); } // (clear_in_progress / wait_clear / request_clear removed from public API)