better gameboy timings

This commit is contained in:
2025-10-09 18:14:17 +02:00
parent 4e78618556
commit 8520ef556b
5 changed files with 251 additions and 44 deletions

View File

@@ -51,10 +51,10 @@ extern "C" void s_spi_post_cb(spi_transaction_t* trans);
static inline spi_device_interface_config_t _devcfg = {
.mode = 0, // SPI mode 0
.clock_speed_hz = 6 * 1000 * 1000, // Clock out at 10 MHz
.clock_speed_hz = 10 * 1000 * 1000, // Clock out at 10 MHz
.spics_io_num = SPI_DISP_CS, // CS pin
.flags = SPI_DEVICE_POSITIVE_CS,
.queue_size = 3,
.queue_size = 1,
.pre_cb = nullptr,
.post_cb = s_spi_post_cb,
};

View File

@@ -20,7 +20,7 @@ private:
.sclk_io_num = SPI_SCK,
.quadwp_io_num = -1,
.quadhd_io_num = -1,
.max_transfer_sz = 400 * 240 * 2};
.max_transfer_sz = 12482U};
}; // namespace spi_global

View File

@@ -20,6 +20,7 @@
#include <shutdowner.hpp>
#include <spi_global.hpp>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
@@ -29,6 +30,160 @@
#include "esp_sleep.h"
#include "sdkconfig.h"
#include <algorithm>
#include <cinttypes>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#if CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS && CONFIG_FREERTOS_USE_TRACE_FACILITY
namespace {
constexpr TickType_t kStatsTaskDelayTicks = pdMS_TO_TICKS(5000);
constexpr TickType_t kStatsWarmupDelay = pdMS_TO_TICKS(2000);
constexpr UBaseType_t kStatsTaskPriority = tskIDLE_PRIORITY + 1;
constexpr uint32_t kStatsTaskStack = 4096;
constexpr char kStatsTaskName[] = "TaskStats";
struct TaskRuntimeSample {
TaskHandle_t handle;
uint32_t runtime;
};
struct TaskUsageRow {
std::string name;
uint64_t delta;
UBaseType_t priority;
uint32_t stackHighWaterBytes;
bool isIdle;
};
[[nodiscard]] uint64_t deltaWithWrap(uint32_t current, uint32_t previous) {
if (current >= previous)
return static_cast<uint64_t>(current - previous);
return static_cast<uint64_t>(current) + (static_cast<uint64_t>(UINT32_MAX) - previous) + 1ULL;
}
void task_usage_monitor(void*) {
static constexpr char tag[] = "TaskUsage";
std::vector<TaskRuntimeSample> lastSamples;
uint32_t lastTotal = 0;
vTaskDelay(kStatsWarmupDelay);
while (true) {
vTaskDelay(kStatsTaskDelayTicks);
const UBaseType_t taskCount = uxTaskGetNumberOfTasks();
if (taskCount == 0)
continue;
std::vector<TaskStatus_t> statusBuffer(taskCount);
uint32_t totalRuntime = 0;
const UBaseType_t captured = uxTaskGetSystemState(statusBuffer.data(), statusBuffer.size(), &totalRuntime);
if (captured == 0)
continue;
statusBuffer.resize(captured);
std::vector<TaskRuntimeSample> currentSamples;
currentSamples.reserve(statusBuffer.size());
if (lastTotal == 0) {
for (const auto& status: statusBuffer) {
currentSamples.push_back({status.xHandle, status.ulRunTimeCounter});
}
lastSamples = std::move(currentSamples);
lastTotal = totalRuntime;
continue;
}
const uint64_t totalDelta = deltaWithWrap(totalRuntime, lastTotal);
if (totalDelta == 0)
continue;
std::vector<TaskUsageRow> rows;
rows.reserve(statusBuffer.size());
uint64_t idleDelta = 0;
uint64_t activeDelta = 0;
uint64_t accountedDelta = 0;
for (const auto& status: statusBuffer) {
const auto it = std::find_if(lastSamples.begin(), lastSamples.end(), [&](const TaskRuntimeSample& entry) {
return entry.handle == status.xHandle;
});
const uint32_t previousRuntime = (it != lastSamples.end()) ? it->runtime : status.ulRunTimeCounter;
const uint64_t taskDelta = (it != lastSamples.end()) ? deltaWithWrap(status.ulRunTimeCounter, previousRuntime) : 0ULL;
currentSamples.push_back({status.xHandle, status.ulRunTimeCounter});
TaskUsageRow row{
.name = std::string(status.pcTaskName ? status.pcTaskName : ""),
.delta = taskDelta,
.priority = status.uxCurrentPriority,
.stackHighWaterBytes = static_cast<uint32_t>(status.usStackHighWaterMark) * sizeof(StackType_t),
.isIdle = status.uxCurrentPriority == tskIDLE_PRIORITY ||
(status.pcTaskName && std::strncmp(status.pcTaskName, "IDLE", 4) == 0)
};
rows.push_back(std::move(row));
accountedDelta += taskDelta;
if (rows.back().isIdle)
idleDelta += taskDelta;
else
activeDelta += taskDelta;
}
lastSamples = std::move(currentSamples);
lastTotal = totalRuntime;
if (rows.empty())
continue;
std::sort(rows.begin(), rows.end(), [](const TaskUsageRow& a, const TaskUsageRow& b) {
return a.delta > b.delta;
});
const double windowMs = static_cast<double>(totalDelta) / 1000.0;
std::printf("\n[%s] CPU usage over %.1f ms window\n", tag, windowMs);
for (const auto& row: rows) {
if (row.delta == 0 || row.isIdle)
continue;
const double pct = (static_cast<double>(row.delta) * 100.0) / static_cast<double>(totalDelta);
std::printf(" %-16s %6.2f%% (prio=%u stack_free=%lu B)\n", row.name.c_str(), pct, row.priority,
static_cast<unsigned long>(row.stackHighWaterBytes));
}
const double idlePct = (idleDelta * 100.0) / static_cast<double>(totalDelta);
std::printf(" %-16s %6.2f%% (aggregated idle)\n", "<idle>", idlePct);
const uint64_t residual = (accountedDelta >= totalDelta) ? 0ULL : (totalDelta - accountedDelta);
if (residual > 0) {
const double residualPct = (static_cast<double>(residual) * 100.0) / static_cast<double>(totalDelta);
std::printf(" %-16s %6.2f%% (ISRs / scheduler)\n", "<isr>", residualPct);
}
std::printf("[%s] Active %.2f%% | Idle %.2f%%\n", tag,
(activeDelta * 100.0) / static_cast<double>(totalDelta), idlePct);
std::fflush(stdout);
}
}
void start_task_usage_monitor() {
xTaskCreatePinnedToCore(task_usage_monitor, kStatsTaskName, kStatsTaskStack, nullptr, kStatsTaskPriority, nullptr, 0);
}
} // namespace
#else
inline void start_task_usage_monitor() {}
#endif
extern "C" void app_main() {
#ifdef CONFIG_PM_ENABLE
// const esp_pm_config_t pm_config = {
@@ -71,5 +226,7 @@ extern "C" void app_main() {
system.registerApp(apps::createTetrisAppFactory());
system.registerApp(apps::createGameboyAppFactory());
start_task_usage_monitor();
system.run();
}

View File

@@ -27,6 +27,18 @@
#include <sys/stat.h>
#include <vector>
#define GAMEBOY_PERF_METRICS 0
#ifndef GAMEBOY_PERF_METRICS
#define GAMEBOY_PERF_METRICS 1
#endif
#if GAMEBOY_PERF_METRICS
#define GB_PERF_ONLY(...) __VA_ARGS__
#else
#define GB_PERF_ONLY(...)
#endif
namespace apps {
namespace {
@@ -161,7 +173,8 @@ public:
void onStart() override {
cancelTick();
perf.resetAll();
frameDelayCarryUs = 0;
GB_PERF_ONLY(perf.resetAll();)
prevInput = {};
statusMessage.clear();
resetFpsStats();
@@ -176,46 +189,52 @@ public:
void onStop() override {
cancelTick();
perf.maybePrintAggregate(true);
frameDelayCarryUs = 0;
GB_PERF_ONLY(perf.maybePrintAggregate(true);)
unloadRom();
}
void handleEvent(const AppEvent& event) override {
if (event.type == AppEventType::Timer && event.timer.handle == tickTimer) {
tickTimer = kInvalidAppTimer;
const uint64_t frameStartUs = esp_timer_get_time();
performStep();
scheduleNextTick(nextDelayMs());
const uint64_t frameEndUs = esp_timer_get_time();
const uint64_t elapsedUs = (frameEndUs >= frameStartUs) ? (frameEndUs - frameStartUs) : 0;
printf("Step took %" PRIu64 " us\n", elapsedUs);
scheduleAfterFrame(elapsedUs);
return;
}
if (event.type == AppEventType::Button) {
frameDelayCarryUs = 0;
scheduleNextTick(0);
}
}
void performStep() {
perf.resetForStep();
GB_PERF_ONLY(perf.resetForStep();)
const uint64_t inputStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t inputStartUs = esp_timer_get_time();)
const InputState input = context.input.readState();
perf.inputUs = esp_timer_get_time() - inputStartUs;
GB_PERF_ONLY(perf.inputUs = esp_timer_get_time() - inputStartUs;)
const Mode stepMode = mode;
switch (stepMode) {
case Mode::Browse: {
const uint64_t handleStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t handleStartUs = esp_timer_get_time();)
handleBrowserInput(input);
perf.handleUs = esp_timer_get_time() - handleStartUs;
GB_PERF_ONLY(perf.handleUs = esp_timer_get_time() - handleStartUs;)
const uint64_t renderStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t renderStartUs = esp_timer_get_time();)
renderBrowser();
perf.renderUs = esp_timer_get_time() - renderStartUs;
GB_PERF_ONLY(perf.renderUs = esp_timer_get_time() - renderStartUs;)
break;
}
case Mode::Running: {
const uint64_t handleStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t handleStartUs = esp_timer_get_time();)
handleGameInput(input);
perf.handleUs = esp_timer_get_time() - handleStartUs;
GB_PERF_ONLY(perf.handleUs = esp_timer_get_time() - handleStartUs;)
if (!gbReady) {
mode = Mode::Browse;
@@ -223,32 +242,28 @@ public:
break;
}
const uint64_t geometryStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t geometryStartUs = esp_timer_get_time();)
ensureRenderGeometry();
perf.geometryUs = esp_timer_get_time() - geometryStartUs;
GB_PERF_ONLY(perf.geometryUs = esp_timer_get_time() - geometryStartUs;)
const uint64_t waitStartUs = esp_timer_get_time();
DispTools::draw_to_display_async_wait();
perf.waitUs = esp_timer_get_time() - waitStartUs;
const uint64_t runStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t runStartUs = esp_timer_get_time();)
gb_run_frame(&gb);
perf.runUs = esp_timer_get_time() - runStartUs;
GB_PERF_ONLY(perf.runUs = esp_timer_get_time() - runStartUs;)
const uint64_t renderStartUs = esp_timer_get_time();
GB_PERF_ONLY(const uint64_t renderStartUs = esp_timer_get_time();)
renderGameFrame();
perf.renderUs = esp_timer_get_time() - renderStartUs;
GB_PERF_ONLY(perf.renderUs = esp_timer_get_time() - renderStartUs;)
break;
}
}
prevInput = input;
perf.finishStep();
perf.accumulate();
perf.printStep(stepMode, gbReady, frameDirty, fpsCurrent, activeRomName, roms.size(), selectedIndex,
browserDirty);
perf.maybePrintAggregate();
GB_PERF_ONLY(perf.finishStep();)
GB_PERF_ONLY(perf.accumulate();)
GB_PERF_ONLY(perf.printStep(stepMode, gbReady, frameDirty, fpsCurrent, activeRomName, roms.size(), selectedIndex,
browserDirty);)
GB_PERF_ONLY(perf.maybePrintAggregate();)
}
private:
@@ -458,22 +473,34 @@ private:
class ScopedCallbackTimer {
public:
ScopedCallbackTimer(GameboyApp* instance, PerfTracker::CallbackKind kind) : app(instance), kind(kind) {
if (app)
startUs = esp_timer_get_time();
ScopedCallbackTimer(GameboyApp* instance, PerfTracker::CallbackKind kind) {
#if GAMEBOY_PERF_METRICS
if (instance) {
app = instance;
cbKind = kind;
startUs = esp_timer_get_time();
}
#else
(void)instance;
(void)kind;
#endif
}
~ScopedCallbackTimer() {
#if GAMEBOY_PERF_METRICS
if (!app)
return;
const uint64_t end = esp_timer_get_time();
app->perf.addCallback(kind, end - startUs);
app->perf.addCallback(cbKind, end - startUs);
#endif
}
private:
GameboyApp* app;
PerfTracker::CallbackKind kind;
#if GAMEBOY_PERF_METRICS
GameboyApp* app = nullptr;
PerfTracker::CallbackKind cbKind{};
uint64_t startUs = 0;
#endif
};
struct RenderGeometry {
@@ -491,10 +518,12 @@ private:
std::array<int, LCD_WIDTH> colXEnd{};
};
AppContext& context;
Framebuffer& framebuffer;
PerfTracker perf{};
AppContext& context;
Framebuffer& framebuffer;
PerfTracker perf{};
AppTimerHandle tickTimer = kInvalidAppTimer;
int64_t frameDelayCarryUs = 0;
static constexpr uint32_t kTargetFrameUs = 1000000 / 60; // ~16.6 ms
Mode mode = Mode::Browse;
ScaleMode scaleMode = ScaleMode::Original;
@@ -532,10 +561,25 @@ private:
tickTimer = context.scheduleTimer(delayMs, false);
}
uint32_t nextDelayMs() const {
if (mode == Mode::Running)
return 0;
return browserDirty ? 50 : 140;
uint32_t idleDelayMs() const { return browserDirty ? 50 : 140; }
void scheduleAfterFrame(uint64_t elapsedUs) {
if (mode == Mode::Running && gbReady) {
int64_t desiredUs = static_cast<int64_t>(kTargetFrameUs) - static_cast<int64_t>(elapsedUs);
desiredUs += frameDelayCarryUs;
if (desiredUs <= 0) {
frameDelayCarryUs = desiredUs;
scheduleNextTick(0);
return;
}
frameDelayCarryUs = desiredUs % 1000;
desiredUs -= frameDelayCarryUs;
uint32_t delayMs = static_cast<uint32_t>(desiredUs / 1000);
scheduleNextTick(delayMs);
return;
}
frameDelayCarryUs = 0;
scheduleNextTick(idleDelayMs());
}
bool ensureFilesystemReady() {
@@ -1216,6 +1260,10 @@ private:
Framebuffer& fb = self->framebuffer;
GB_PERF_ONLY(const uint64_t waitStartUs = esp_timer_get_time();)
DispTools::draw_to_display_async_wait();
GB_PERF_ONLY(perf.waitUs = esp_timer_get_time() - waitStartUs;)
if (geom.scaledWidth == LCD_WIDTH && geom.scaledHeight == LCD_HEIGHT) {
const int dstY = yStart;
const int dstXBase = geom.offsetX;

View File

@@ -51,7 +51,6 @@ extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* /*t*/) {
static void clear_task(void*) {
for (;;) {
if (xSemaphoreTake(s_clearReqSem, portMAX_DELAY) == pdTRUE) {
printf("Started zeroing\n");
spi_transaction_t* r = nullptr;
ESP_ERROR_CHECK(spi_device_get_trans_result(SMD::_spi, &r, 0));
zero_framebuffer_payload();
@@ -111,11 +110,14 @@ void SMD::async_draw_wait() {
// assert((uxSemaphoreGetCount(s_clearSem) == 0) == _inFlight);
return;
}
auto now = esp_timer_get_time();
if (!xSemaphoreTake(s_clearSem, portMAX_DELAY))
assert(false);
if (!xSemaphoreGive(s_clearSem))
assert(false);
assert(!_inFlight);
auto elapsed = esp_timer_get_time() - now;
printf("Waited %" PRIu64 " us\n", elapsed);
}
// (clear_in_progress / wait_clear / request_clear removed from public API)