better gameboy timings

This commit is contained in:
2025-10-09 18:14:17 +02:00
parent 4e78618556
commit 8520ef556b
5 changed files with 251 additions and 44 deletions

View File

@@ -51,10 +51,10 @@ extern "C" void s_spi_post_cb(spi_transaction_t* trans);
static inline spi_device_interface_config_t _devcfg = { static inline spi_device_interface_config_t _devcfg = {
.mode = 0, // SPI mode 0 .mode = 0, // SPI mode 0
.clock_speed_hz = 6 * 1000 * 1000, // Clock out at 10 MHz .clock_speed_hz = 10 * 1000 * 1000, // Clock out at 10 MHz
.spics_io_num = SPI_DISP_CS, // CS pin .spics_io_num = SPI_DISP_CS, // CS pin
.flags = SPI_DEVICE_POSITIVE_CS, .flags = SPI_DEVICE_POSITIVE_CS,
.queue_size = 3, .queue_size = 1,
.pre_cb = nullptr, .pre_cb = nullptr,
.post_cb = s_spi_post_cb, .post_cb = s_spi_post_cb,
}; };

View File

@@ -20,7 +20,7 @@ private:
.sclk_io_num = SPI_SCK, .sclk_io_num = SPI_SCK,
.quadwp_io_num = -1, .quadwp_io_num = -1,
.quadhd_io_num = -1, .quadhd_io_num = -1,
.max_transfer_sz = 400 * 240 * 2}; .max_transfer_sz = 12482U};
}; // namespace spi_global }; // namespace spi_global

View File

@@ -20,6 +20,7 @@
#include <shutdowner.hpp> #include <shutdowner.hpp>
#include <spi_global.hpp> #include <spi_global.hpp>
#include "freertos/FreeRTOS.h" #include "freertos/FreeRTOS.h"
#include "freertos/task.h" #include "freertos/task.h"
@@ -29,6 +30,160 @@
#include "esp_sleep.h" #include "esp_sleep.h"
#include "sdkconfig.h" #include "sdkconfig.h"
#include <algorithm>
#include <cinttypes>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#if CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS && CONFIG_FREERTOS_USE_TRACE_FACILITY
namespace {
constexpr TickType_t kStatsTaskDelayTicks = pdMS_TO_TICKS(5000);
constexpr TickType_t kStatsWarmupDelay = pdMS_TO_TICKS(2000);
constexpr UBaseType_t kStatsTaskPriority = tskIDLE_PRIORITY + 1;
constexpr uint32_t kStatsTaskStack = 4096;
constexpr char kStatsTaskName[] = "TaskStats";
struct TaskRuntimeSample {
TaskHandle_t handle;
uint32_t runtime;
};
struct TaskUsageRow {
std::string name;
uint64_t delta;
UBaseType_t priority;
uint32_t stackHighWaterBytes;
bool isIdle;
};
[[nodiscard]] uint64_t deltaWithWrap(uint32_t current, uint32_t previous) {
if (current >= previous)
return static_cast<uint64_t>(current - previous);
return static_cast<uint64_t>(current) + (static_cast<uint64_t>(UINT32_MAX) - previous) + 1ULL;
}
void task_usage_monitor(void*) {
static constexpr char tag[] = "TaskUsage";
std::vector<TaskRuntimeSample> lastSamples;
uint32_t lastTotal = 0;
vTaskDelay(kStatsWarmupDelay);
while (true) {
vTaskDelay(kStatsTaskDelayTicks);
const UBaseType_t taskCount = uxTaskGetNumberOfTasks();
if (taskCount == 0)
continue;
std::vector<TaskStatus_t> statusBuffer(taskCount);
uint32_t totalRuntime = 0;
const UBaseType_t captured = uxTaskGetSystemState(statusBuffer.data(), statusBuffer.size(), &totalRuntime);
if (captured == 0)
continue;
statusBuffer.resize(captured);
std::vector<TaskRuntimeSample> currentSamples;
currentSamples.reserve(statusBuffer.size());
if (lastTotal == 0) {
for (const auto& status: statusBuffer) {
currentSamples.push_back({status.xHandle, status.ulRunTimeCounter});
}
lastSamples = std::move(currentSamples);
lastTotal = totalRuntime;
continue;
}
const uint64_t totalDelta = deltaWithWrap(totalRuntime, lastTotal);
if (totalDelta == 0)
continue;
std::vector<TaskUsageRow> rows;
rows.reserve(statusBuffer.size());
uint64_t idleDelta = 0;
uint64_t activeDelta = 0;
uint64_t accountedDelta = 0;
for (const auto& status: statusBuffer) {
const auto it = std::find_if(lastSamples.begin(), lastSamples.end(), [&](const TaskRuntimeSample& entry) {
return entry.handle == status.xHandle;
});
const uint32_t previousRuntime = (it != lastSamples.end()) ? it->runtime : status.ulRunTimeCounter;
const uint64_t taskDelta = (it != lastSamples.end()) ? deltaWithWrap(status.ulRunTimeCounter, previousRuntime) : 0ULL;
currentSamples.push_back({status.xHandle, status.ulRunTimeCounter});
TaskUsageRow row{
.name = std::string(status.pcTaskName ? status.pcTaskName : ""),
.delta = taskDelta,
.priority = status.uxCurrentPriority,
.stackHighWaterBytes = static_cast<uint32_t>(status.usStackHighWaterMark) * sizeof(StackType_t),
.isIdle = status.uxCurrentPriority == tskIDLE_PRIORITY ||
(status.pcTaskName && std::strncmp(status.pcTaskName, "IDLE", 4) == 0)
};
rows.push_back(std::move(row));
accountedDelta += taskDelta;
if (rows.back().isIdle)
idleDelta += taskDelta;
else
activeDelta += taskDelta;
}
lastSamples = std::move(currentSamples);
lastTotal = totalRuntime;
if (rows.empty())
continue;
std::sort(rows.begin(), rows.end(), [](const TaskUsageRow& a, const TaskUsageRow& b) {
return a.delta > b.delta;
});
const double windowMs = static_cast<double>(totalDelta) / 1000.0;
std::printf("\n[%s] CPU usage over %.1f ms window\n", tag, windowMs);
for (const auto& row: rows) {
if (row.delta == 0 || row.isIdle)
continue;
const double pct = (static_cast<double>(row.delta) * 100.0) / static_cast<double>(totalDelta);
std::printf(" %-16s %6.2f%% (prio=%u stack_free=%lu B)\n", row.name.c_str(), pct, row.priority,
static_cast<unsigned long>(row.stackHighWaterBytes));
}
const double idlePct = (idleDelta * 100.0) / static_cast<double>(totalDelta);
std::printf(" %-16s %6.2f%% (aggregated idle)\n", "<idle>", idlePct);
const uint64_t residual = (accountedDelta >= totalDelta) ? 0ULL : (totalDelta - accountedDelta);
if (residual > 0) {
const double residualPct = (static_cast<double>(residual) * 100.0) / static_cast<double>(totalDelta);
std::printf(" %-16s %6.2f%% (ISRs / scheduler)\n", "<isr>", residualPct);
}
std::printf("[%s] Active %.2f%% | Idle %.2f%%\n", tag,
(activeDelta * 100.0) / static_cast<double>(totalDelta), idlePct);
std::fflush(stdout);
}
}
void start_task_usage_monitor() {
xTaskCreatePinnedToCore(task_usage_monitor, kStatsTaskName, kStatsTaskStack, nullptr, kStatsTaskPriority, nullptr, 0);
}
} // namespace
#else
inline void start_task_usage_monitor() {}
#endif
extern "C" void app_main() { extern "C" void app_main() {
#ifdef CONFIG_PM_ENABLE #ifdef CONFIG_PM_ENABLE
// const esp_pm_config_t pm_config = { // const esp_pm_config_t pm_config = {
@@ -71,5 +226,7 @@ extern "C" void app_main() {
system.registerApp(apps::createTetrisAppFactory()); system.registerApp(apps::createTetrisAppFactory());
system.registerApp(apps::createGameboyAppFactory()); system.registerApp(apps::createGameboyAppFactory());
start_task_usage_monitor();
system.run(); system.run();
} }

View File

@@ -27,6 +27,18 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <vector> #include <vector>
#define GAMEBOY_PERF_METRICS 0
#ifndef GAMEBOY_PERF_METRICS
#define GAMEBOY_PERF_METRICS 1
#endif
#if GAMEBOY_PERF_METRICS
#define GB_PERF_ONLY(...) __VA_ARGS__
#else
#define GB_PERF_ONLY(...)
#endif
namespace apps { namespace apps {
namespace { namespace {
@@ -161,7 +173,8 @@ public:
void onStart() override { void onStart() override {
cancelTick(); cancelTick();
perf.resetAll(); frameDelayCarryUs = 0;
GB_PERF_ONLY(perf.resetAll();)
prevInput = {}; prevInput = {};
statusMessage.clear(); statusMessage.clear();
resetFpsStats(); resetFpsStats();
@@ -176,46 +189,52 @@ public:
void onStop() override { void onStop() override {
cancelTick(); cancelTick();
perf.maybePrintAggregate(true); frameDelayCarryUs = 0;
GB_PERF_ONLY(perf.maybePrintAggregate(true);)
unloadRom(); unloadRom();
} }
void handleEvent(const AppEvent& event) override { void handleEvent(const AppEvent& event) override {
if (event.type == AppEventType::Timer && event.timer.handle == tickTimer) { if (event.type == AppEventType::Timer && event.timer.handle == tickTimer) {
tickTimer = kInvalidAppTimer; tickTimer = kInvalidAppTimer;
const uint64_t frameStartUs = esp_timer_get_time();
performStep(); performStep();
scheduleNextTick(nextDelayMs()); const uint64_t frameEndUs = esp_timer_get_time();
const uint64_t elapsedUs = (frameEndUs >= frameStartUs) ? (frameEndUs - frameStartUs) : 0;
printf("Step took %" PRIu64 " us\n", elapsedUs);
scheduleAfterFrame(elapsedUs);
return; return;
} }
if (event.type == AppEventType::Button) { if (event.type == AppEventType::Button) {
frameDelayCarryUs = 0;
scheduleNextTick(0); scheduleNextTick(0);
} }
} }
void performStep() { void performStep() {
perf.resetForStep(); GB_PERF_ONLY(perf.resetForStep();)
const uint64_t inputStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t inputStartUs = esp_timer_get_time();)
const InputState input = context.input.readState(); const InputState input = context.input.readState();
perf.inputUs = esp_timer_get_time() - inputStartUs; GB_PERF_ONLY(perf.inputUs = esp_timer_get_time() - inputStartUs;)
const Mode stepMode = mode; const Mode stepMode = mode;
switch (stepMode) { switch (stepMode) {
case Mode::Browse: { case Mode::Browse: {
const uint64_t handleStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t handleStartUs = esp_timer_get_time();)
handleBrowserInput(input); handleBrowserInput(input);
perf.handleUs = esp_timer_get_time() - handleStartUs; GB_PERF_ONLY(perf.handleUs = esp_timer_get_time() - handleStartUs;)
const uint64_t renderStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t renderStartUs = esp_timer_get_time();)
renderBrowser(); renderBrowser();
perf.renderUs = esp_timer_get_time() - renderStartUs; GB_PERF_ONLY(perf.renderUs = esp_timer_get_time() - renderStartUs;)
break; break;
} }
case Mode::Running: { case Mode::Running: {
const uint64_t handleStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t handleStartUs = esp_timer_get_time();)
handleGameInput(input); handleGameInput(input);
perf.handleUs = esp_timer_get_time() - handleStartUs; GB_PERF_ONLY(perf.handleUs = esp_timer_get_time() - handleStartUs;)
if (!gbReady) { if (!gbReady) {
mode = Mode::Browse; mode = Mode::Browse;
@@ -223,32 +242,28 @@ public:
break; break;
} }
const uint64_t geometryStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t geometryStartUs = esp_timer_get_time();)
ensureRenderGeometry(); ensureRenderGeometry();
perf.geometryUs = esp_timer_get_time() - geometryStartUs; GB_PERF_ONLY(perf.geometryUs = esp_timer_get_time() - geometryStartUs;)
const uint64_t waitStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t runStartUs = esp_timer_get_time();)
DispTools::draw_to_display_async_wait();
perf.waitUs = esp_timer_get_time() - waitStartUs;
const uint64_t runStartUs = esp_timer_get_time();
gb_run_frame(&gb); gb_run_frame(&gb);
perf.runUs = esp_timer_get_time() - runStartUs; GB_PERF_ONLY(perf.runUs = esp_timer_get_time() - runStartUs;)
const uint64_t renderStartUs = esp_timer_get_time(); GB_PERF_ONLY(const uint64_t renderStartUs = esp_timer_get_time();)
renderGameFrame(); renderGameFrame();
perf.renderUs = esp_timer_get_time() - renderStartUs; GB_PERF_ONLY(perf.renderUs = esp_timer_get_time() - renderStartUs;)
break; break;
} }
} }
prevInput = input; prevInput = input;
perf.finishStep(); GB_PERF_ONLY(perf.finishStep();)
perf.accumulate(); GB_PERF_ONLY(perf.accumulate();)
perf.printStep(stepMode, gbReady, frameDirty, fpsCurrent, activeRomName, roms.size(), selectedIndex, GB_PERF_ONLY(perf.printStep(stepMode, gbReady, frameDirty, fpsCurrent, activeRomName, roms.size(), selectedIndex,
browserDirty); browserDirty);)
perf.maybePrintAggregate(); GB_PERF_ONLY(perf.maybePrintAggregate();)
} }
private: private:
@@ -458,22 +473,34 @@ private:
class ScopedCallbackTimer { class ScopedCallbackTimer {
public: public:
ScopedCallbackTimer(GameboyApp* instance, PerfTracker::CallbackKind kind) : app(instance), kind(kind) { ScopedCallbackTimer(GameboyApp* instance, PerfTracker::CallbackKind kind) {
if (app) #if GAMEBOY_PERF_METRICS
startUs = esp_timer_get_time(); if (instance) {
app = instance;
cbKind = kind;
startUs = esp_timer_get_time();
}
#else
(void)instance;
(void)kind;
#endif
} }
~ScopedCallbackTimer() { ~ScopedCallbackTimer() {
#if GAMEBOY_PERF_METRICS
if (!app) if (!app)
return; return;
const uint64_t end = esp_timer_get_time(); const uint64_t end = esp_timer_get_time();
app->perf.addCallback(kind, end - startUs); app->perf.addCallback(cbKind, end - startUs);
#endif
} }
private: private:
GameboyApp* app; #if GAMEBOY_PERF_METRICS
PerfTracker::CallbackKind kind; GameboyApp* app = nullptr;
PerfTracker::CallbackKind cbKind{};
uint64_t startUs = 0; uint64_t startUs = 0;
#endif
}; };
struct RenderGeometry { struct RenderGeometry {
@@ -491,10 +518,12 @@ private:
std::array<int, LCD_WIDTH> colXEnd{}; std::array<int, LCD_WIDTH> colXEnd{};
}; };
AppContext& context; AppContext& context;
Framebuffer& framebuffer; Framebuffer& framebuffer;
PerfTracker perf{}; PerfTracker perf{};
AppTimerHandle tickTimer = kInvalidAppTimer; AppTimerHandle tickTimer = kInvalidAppTimer;
int64_t frameDelayCarryUs = 0;
static constexpr uint32_t kTargetFrameUs = 1000000 / 60; // ~16.6 ms
Mode mode = Mode::Browse; Mode mode = Mode::Browse;
ScaleMode scaleMode = ScaleMode::Original; ScaleMode scaleMode = ScaleMode::Original;
@@ -532,10 +561,25 @@ private:
tickTimer = context.scheduleTimer(delayMs, false); tickTimer = context.scheduleTimer(delayMs, false);
} }
uint32_t nextDelayMs() const { uint32_t idleDelayMs() const { return browserDirty ? 50 : 140; }
if (mode == Mode::Running)
return 0; void scheduleAfterFrame(uint64_t elapsedUs) {
return browserDirty ? 50 : 140; if (mode == Mode::Running && gbReady) {
int64_t desiredUs = static_cast<int64_t>(kTargetFrameUs) - static_cast<int64_t>(elapsedUs);
desiredUs += frameDelayCarryUs;
if (desiredUs <= 0) {
frameDelayCarryUs = desiredUs;
scheduleNextTick(0);
return;
}
frameDelayCarryUs = desiredUs % 1000;
desiredUs -= frameDelayCarryUs;
uint32_t delayMs = static_cast<uint32_t>(desiredUs / 1000);
scheduleNextTick(delayMs);
return;
}
frameDelayCarryUs = 0;
scheduleNextTick(idleDelayMs());
} }
bool ensureFilesystemReady() { bool ensureFilesystemReady() {
@@ -1216,6 +1260,10 @@ private:
Framebuffer& fb = self->framebuffer; Framebuffer& fb = self->framebuffer;
GB_PERF_ONLY(const uint64_t waitStartUs = esp_timer_get_time();)
DispTools::draw_to_display_async_wait();
GB_PERF_ONLY(perf.waitUs = esp_timer_get_time() - waitStartUs;)
if (geom.scaledWidth == LCD_WIDTH && geom.scaledHeight == LCD_HEIGHT) { if (geom.scaledWidth == LCD_WIDTH && geom.scaledHeight == LCD_HEIGHT) {
const int dstY = yStart; const int dstY = yStart;
const int dstXBase = geom.offsetX; const int dstXBase = geom.offsetX;

View File

@@ -51,7 +51,6 @@ extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* /*t*/) {
static void clear_task(void*) { static void clear_task(void*) {
for (;;) { for (;;) {
if (xSemaphoreTake(s_clearReqSem, portMAX_DELAY) == pdTRUE) { if (xSemaphoreTake(s_clearReqSem, portMAX_DELAY) == pdTRUE) {
printf("Started zeroing\n");
spi_transaction_t* r = nullptr; spi_transaction_t* r = nullptr;
ESP_ERROR_CHECK(spi_device_get_trans_result(SMD::_spi, &r, 0)); ESP_ERROR_CHECK(spi_device_get_trans_result(SMD::_spi, &r, 0));
zero_framebuffer_payload(); zero_framebuffer_payload();
@@ -111,11 +110,14 @@ void SMD::async_draw_wait() {
// assert((uxSemaphoreGetCount(s_clearSem) == 0) == _inFlight); // assert((uxSemaphoreGetCount(s_clearSem) == 0) == _inFlight);
return; return;
} }
auto now = esp_timer_get_time();
if (!xSemaphoreTake(s_clearSem, portMAX_DELAY)) if (!xSemaphoreTake(s_clearSem, portMAX_DELAY))
assert(false); assert(false);
if (!xSemaphoreGive(s_clearSem)) if (!xSemaphoreGive(s_clearSem))
assert(false); assert(false);
assert(!_inFlight); assert(!_inFlight);
auto elapsed = esp_timer_get_time() - now;
printf("Waited %" PRIu64 " us\n", elapsed);
} }
// (clear_in_progress / wait_clear / request_clear removed from public API) // (clear_in_progress / wait_clear / request_clear removed from public API)