better gpt tetris 4

This commit is contained in:
2025-10-07 01:14:38 +02:00
parent 4861d26d8a
commit 7df84f1e81
5 changed files with 176 additions and 85 deletions

View File

@@ -20,9 +20,10 @@ public:
void pooler(); // FIXME:
private:
static inline i2c_device_config_t _dev_cfg = {
.dev_addr_length = I2C_ADDR_BIT_LEN_7,
.device_address = 0x36,
.scl_speed_hz = 100000,
.dev_addr_length = I2C_ADDR_BIT_LEN_7,
.device_address = 0x36,
.scl_speed_hz = 100000,
.flags = 0,
};
BatMon();

View File

@@ -41,6 +41,9 @@ namespace DispTools {
}
}
static void draw_to_display() { SMD::draw(); }
static void draw_to_display_async_start() { SMD::draw_async_start(); }
static void draw_to_display_async_wait() { SMD::draw_async_wait(); }
static bool draw_to_display_async_busy() { return SMD::draw_async_busy(); }
};

View File

@@ -25,6 +25,10 @@ DMA_ATTR extern uint8_t dma_buf[SMD::kLineDataBytes];
void init();
void clear();
void draw();
// Asynchronous (DMA queued) draw API
void draw_async_start(); // queue frame transfer if none in flight
void draw_async_wait(); // wait for any in-flight transfer to finish
bool draw_async_busy(); // true if a transfer is in-flight
static void set_pixel(int x, int y, bool value) {
assert(x >= 0 && x < DISP_WIDTH && y >= 0 && y < DISP_HEIGHT);
@@ -49,6 +53,8 @@ static inline spi_device_interface_config_t _devcfg = {
};
extern spi_device_handle_t _spi;
extern bool _vcom;
extern bool _inFlight;
extern spi_transaction_t _tx; // persistent transaction struct for async API
}; // namespace SMD
class SMDSurface : public Surface<SMDSurface, BwPixel>, public StandardEventQueue<SMDSurface> {

View File

@@ -133,9 +133,10 @@ struct PlatformClock final : IClock {
return (uint32_t) ((uint64_t) t * 1000ULL / configTICK_RATE_HZ);
}
void sleep_ms(uint32_t ms) override {
// Pass a longer delay when slow mode is active, cap normal delay for responsiveness
int slow_ms = (int) ms; // allow full requested sleep when slow
int normal_ms = (int) std::min<uint32_t>(ms, 30); // cap active-mode sleep to 30ms
// Unified: request same duration for slow and active. In slow mode a button press wakes early.
// In active mode we accept the full requested sleep (responsiveness governed by recommendedSleepMs cap).
int slow_ms = (int) ms;
int normal_ms = (int) ms; // no artificial cap
PowerHelper::get().delay(slow_ms, normal_ms);
}
};
@@ -144,31 +145,38 @@ static inline uint32_t elapsed_ms(uint32_t a, uint32_t b) { return b - a; }
// High precision performance stats (microsecond accumulation via esp_timer)
struct PerfStats {
uint64_t overlayUs = 0, inputUs = 0, logicUs = 0, renderUs = 0, stepUs = 0; // high-level phases (us)
uint64_t rClearUs = 0, rBoardUs = 0, rPiecesUs = 0, rHUDUs = 0, rOverlayUs = 0,
rBlitUs = 0; // render sub-phases (us)
uint32_t frames = 0;
// Accumulators (microseconds)
uint64_t overlayUs = 0, inputUs = 0, logicUs = 0, renderUs = 0, stepUs = 0;
uint64_t rClearUs = 0, rBoardUs = 0, rPiecesUs = 0, rHUDUs = 0, rOverlayUs = 0, rBlitQueueUs = 0, rBlitWaitUs = 0;
// Counters
uint32_t steps = 0; // number of step() iterations during interval
uint32_t renders = 0; // number of actual renders (paintHUD calls) during interval
uint64_t lastPrintUs = 0;
// Last-frame snapshots (microseconds) for immediate per-frame printing
// Last-step / last-render snapshots
uint64_t lastOverlayUs = 0, lastInputUs = 0, lastLogicUs = 0, lastRenderUs = 0, lastStepUs = 0;
uint64_t lastRClearUs = 0, lastRBoardUs = 0, lastRPiecesUs = 0, lastRHUDUs = 0, lastROverlayUs = 0, lastRBlitUs = 0;
bool lastDidRender = false; // whether a render (paintHUD) happened this frame
uint64_t lastRClearUs = 0, lastRBoardUs = 0, lastRPiecesUs = 0, lastRHUDUs = 0, lastROverlayUs = 0;
uint64_t lastRBlitQueueUs = 0, lastRBlitWaitUs = 0;
bool lastDidRender = false;
void maybePrint(uint64_t nowUs) {
if (!lastPrintUs)
lastPrintUs = nowUs;
uint64_t span = nowUs - lastPrintUs;
if (span >= 1000000ULL && frames) {
double f = frames;
auto av = [&](uint64_t v) { return (double) v / 1000.0 / f; }; // convert us->ms per frame
double fps = f * 1000000.0 / span;
printf("PERF overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f "
"ovl=%.3f blt=%.3f] step=%.3fms fps=%.1f\n",
av(overlayUs), av(inputUs), av(logicUs), av(renderUs), av(rClearUs), av(rBoardUs), av(rPiecesUs),
av(rHUDUs), av(rOverlayUs), av(rBlitUs), av(stepUs), fps);
if (span >= 1000000ULL && (steps || renders)) {
double dSteps = steps ? (double) steps : 1.0;
double dRenders = renders ? (double) renders : 1.0;
auto avS = [&](uint64_t v) { return (double) v / 1000.0 / dSteps; };
auto avR = [&](uint64_t v) { return (double) v / 1000.0 / dRenders; };
double fps = renders * 1000000.0 / (double) span; // real display frame rate
printf("PERF steps=%lu frames=%lu span=%.3fs | stepAvg overlay=%.3fms input=%.3fms logic=%.3fms "
"step=%.3fms || renderAvg=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f bltW=%.3f] "
"fps=%.1f\n",
(unsigned long) steps, (unsigned long) renders, (double) span / 1e6, avS(overlayUs), avS(inputUs),
avS(logicUs), avS(stepUs), avR(renderUs), avR(rClearUs), avR(rBoardUs), avR(rPiecesUs), avR(rHUDUs),
avR(rOverlayUs), avR(rBlitQueueUs), avR(rBlitWaitUs), fps);
overlayUs = inputUs = logicUs = renderUs = stepUs = 0;
rClearUs = rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitUs = 0;
frames = 0;
lastPrintUs = nowUs;
rClearUs = rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitQueueUs = rBlitWaitUs = 0;
steps = renders = 0;
lastPrintUs = nowUs;
}
}
static PerfStats& get() {
@@ -471,10 +479,13 @@ public:
uint64_t t6 = tus();
ps.rOverlayUs += (t6 - t5);
ps.lastROverlayUs = (t6 - t5);
DispTools::draw_to_display();
uint64_t t7 = tus();
ps.rBlitUs += (t7 - t6);
ps.lastRBlitUs = (t7 - t6);
// Queue async SPI transfer (DMA) and measure only queue overhead here.
uint64_t bqStart = tus();
DispTools::draw_to_display_async_start();
uint64_t t7 = tus();
uint64_t qdur = (t7 - bqStart);
ps.rBlitQueueUs += qdur; // enqueue overhead
ps.lastRBlitQueueUs = qdur;
}
// Consistent full bordered cell drawing (square regardless of neighbors)
@@ -878,7 +889,17 @@ public:
uint64_t stepStartUs = esp_timer_get_time();
uint32_t now = clock.millis();
PerfStats::get().lastDidRender = false; // reset per-frame render flag
const uint32_t oStart = clock.millis();
// Complete any previous in-flight async display transfer and attribute its duration to blit time
if (DispTools::draw_to_display_async_busy()) {
uint64_t bwStart = esp_timer_get_time();
DispTools::draw_to_display_async_wait();
uint64_t bwEnd = esp_timer_get_time();
auto& ps = PerfStats::get();
uint64_t dur = (bwEnd - bwStart);
ps.rBlitWaitUs += dur; // actual DMA completion time
ps.lastRBlitWaitUs = dur;
}
const uint32_t oStart = clock.millis();
overlayTick(now);
const uint32_t oEnd = clock.millis();
{
@@ -897,6 +918,7 @@ public:
ps.lastInputUs = dur;
}
uint64_t logicStartUs = esp_timer_get_time();
enforceSlowMode();
if (!running) {
bool anyPressed = st.left || st.right || st.down || st.rotate || st.back;
uint32_t sinceGameOver = elapsed_ms(gameOverTime, now);
@@ -928,18 +950,16 @@ public:
ps.stepUs += dur;
ps.lastStepUs = dur;
// Print per-frame breakdown (no render this frame if not dirty)
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
if (ps.lastDidRender) {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f "
"hud=%.3f ovl=%.3f blt=%.3f] step=%.3fms\n",
ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastRenderUs),
ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs),
ms(ps.lastROverlayUs), ms(ps.lastRBlitUs), ms(ps.lastStepUs));
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
double waitMs = ms(ps.lastRBlitWaitUs);
if (waitMs > 0.0005) {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms bltW=%.3fms step=%.3fms\n",
ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), waitMs, ms(ps.lastStepUs));
} else {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs),
ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastStepUs));
}
ps.frames++;
ps.steps++;
ps.maybePrint(stepEndUs);
}
return;
@@ -947,8 +967,7 @@ public:
// Pause toggle
if (st.back && !backPrev) {
paused = !paused;
PowerHelper::get().set_slow(paused);
dirty = true;
dirty = true;
}
backPrev = st.back;
if (paused) {
@@ -967,18 +986,16 @@ public:
uint64_t dur = (stepEndUs - stepStartUs);
ps.stepUs += dur;
ps.lastStepUs = dur;
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
if (ps.lastDidRender) {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f "
"hud=%.3f ovl=%.3f blt=%.3f] step=%.3fms\n",
ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastRenderUs),
ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs),
ms(ps.lastROverlayUs), ms(ps.lastRBlitUs), ms(ps.lastStepUs));
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
double waitMs = ms(ps.lastRBlitWaitUs);
if (waitMs > 0.0005) {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms bltW=%.3fms step=%.3fms\n",
ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), waitMs, ms(ps.lastStepUs));
} else {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs),
ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastStepUs));
}
ps.frames++;
ps.steps++;
ps.maybePrint(stepEndUs);
}
return;
@@ -1020,34 +1037,40 @@ public:
uint64_t dur = (stepEndUs - stepStartUs);
ps.stepUs += dur;
ps.lastStepUs = dur;
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
if (ps.lastDidRender) {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f "
"hud=%.3f ovl=%.3f blt=%.3f] step=%.3fms\n",
ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastRenderUs),
ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs),
ms(ps.lastROverlayUs), ms(ps.lastRBlitUs), ms(ps.lastStepUs));
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
double waitMs = ms(ps.lastRBlitWaitUs);
if (waitMs > 0.0005) {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms bltW=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs),
ms(ps.lastInputUs), ms(ps.lastLogicUs), waitMs, ms(ps.lastStepUs));
} else {
printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs),
ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastStepUs));
}
ps.frames++;
ps.steps++;
ps.maybePrint(stepEndUs);
}
}
uint32_t recommendedSleepMs(uint32_t now) const {
struct SleepPlan {
uint32_t slow_ms; // long sleep allowing battery/UI periodic refresh
uint32_t normal_ms; // short sleep for responsiveness on input wake
};
SleepPlan recommendedSleepMs(uint32_t now) const {
SleepPlan plan{0, 0};
if (dirty)
return 0;
if (paused || !running) {
uint32_t untilOverlay =
(lastOverlayUpd + overlayIntervalMs > now) ? (lastOverlayUpd + overlayIntervalMs - now) : 0;
uint32_t cap = (paused || !running) ? 2000u : 500u;
if (untilOverlay > cap)
untilOverlay = cap;
return untilOverlay;
return plan; // both zero => no sleep
bool idleState = paused || !running;
if (idleState) {
uint32_t interval = overlayIntervalMs * 8; // extended overlay refresh gap
uint32_t since = elapsed_ms(lastOverlayUpd, now);
uint32_t untilOverlay = (since >= interval) ? 0 : (interval - since);
if (untilOverlay > idleLongCapMs)
untilOverlay = idleLongCapMs;
plan.slow_ms = untilOverlay ? untilOverlay : idleShortPollMs;
plan.normal_ms = idleActivePollMs;
return plan;
}
// Estimate next gravity event
// Active gameplay
uint32_t g = score.dropMs;
uint32_t sinceFall = elapsed_ms(lastFall, now);
uint32_t untilDrop = (sinceFall >= g) ? 0 : (g - sinceFall);
@@ -1056,19 +1079,27 @@ public:
uint32_t untilLock = (sinceTouch >= (uint32_t) cfg::LockDelayMs) ? 0 : (cfg::LockDelayMs - sinceTouch);
untilDrop = std::min(untilDrop, untilLock);
}
uint32_t untilOverlay =
(lastOverlayUpd + overlayIntervalMs > now) ? (lastOverlayUpd + overlayIntervalMs - now) : 0;
uint32_t sleep = std::min(untilDrop, untilOverlay);
uint32_t interval = overlayIntervalMs;
uint32_t sinceOverlay = elapsed_ms(lastOverlayUpd, now);
uint32_t untilOverlay = (sinceOverlay >= interval) ? 0 : (interval - sinceOverlay);
uint32_t sleep = std::min(untilDrop, untilOverlay);
if (sleep > maxIdleSleepMs)
sleep = maxIdleSleepMs;
return sleep;
plan.slow_ms = sleep;
plan.normal_ms = std::min<uint32_t>(sleep, activeNormalCapMs);
return plan;
}
private:
// Power-aware overlay throttling
static constexpr uint32_t overlayIntervalMs = 500; // base interval (paused uses multiplier)
static constexpr uint32_t maxIdleSleepMs = 120; // cap to keep input responsive
static constexpr uint32_t maxIdleSleepMs = 120; // active state cap to keep input responsive
static constexpr uint32_t gameOverRestartDelayMs = 2000; // ms grace period before restart allowed
// Idle (paused/game over) timing tunables
static constexpr uint32_t idleLongCapMs = 2500; // longest deep sleep slice while idle (<= overlay 4s)
static constexpr uint32_t idleShortPollMs = 50; // minimal periodic wake to check overlay refresh/input
static constexpr uint32_t idleActivePollMs = 40; // normal_ms provided to PowerHelper for responsiveness
static constexpr uint32_t activeNormalCapMs = 40; // cap for normal_ms during active play
IFramebuffer& fb;
IInput& input;
@@ -1100,6 +1131,14 @@ private:
dirty = true;
}
}
void enforceSlowMode() {
auto& ph = PowerHelper::get();
bool wantSlow = paused || !running;
if (wantSlow && !ph.is_slow())
ph.set_slow(true);
else if (!wantSlow && ph.is_slow())
ph.set_slow(false);
}
void paintHUD() {
uint64_t rStartUs = esp_timer_get_time();
uint32_t rStart = clock.millis();
@@ -1126,6 +1165,12 @@ private:
ps.renderUs += dur;
ps.lastRenderUs = dur;
ps.lastDidRender = true;
ps.renders++;
// Pure per-frame render metrics (does not include asynchronous blit wait from previous frame)
auto ms = [](uint64_t us) { return (double) us / 1000.0; };
printf("FRAME render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f]\n",
ms(ps.lastRenderUs), ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs),
ms(ps.lastRHUDUs), ms(ps.lastROverlayUs), ms(ps.lastRBlitQueueUs));
}
dirty = false;
}
@@ -1138,7 +1183,7 @@ private:
paused = false;
touchingGround = false;
rotPrev = lHeld = rHeld = backPrev = false;
PowerHelper::get().set_slow(false);
// slow mode will be re-evaluated centrally on next step
gameOverTime = 0;
gameOverPrevPressed = false;
spawn();
@@ -1157,6 +1202,7 @@ private:
running = false;
gameOverTime = clock.millis();
gameOverPrevPressed = true; // require a release after delay
// slow mode applied centrally next step
}
}
@@ -1237,6 +1283,7 @@ private:
running = false;
gameOverTime = clock.millis();
gameOverPrevPressed = true;
// slow mode applied centrally next step
return;
}
int c = board.clearLines();
@@ -1264,11 +1311,16 @@ public:
~App() { delete game; }
void runForever() {
while (true) {
uint32_t now = clock.millis();
game->step();
uint32_t sleepMs = game->recommendedSleepMs(now);
if (sleepMs) {
clock.sleep_ms(sleepMs);
// Use time AFTER step for more accurate sleep scheduling (prevents fast loops when paused)
uint32_t now = clock.millis();
auto plan = game->recommendedSleepMs(now);
uint32_t sm = plan.slow_ms;
uint32_t nm = plan.normal_ms;
if (sm || nm) {
printf("Sleep slow=%lu normal=%lu %s\n", (unsigned long) sm, (unsigned long) nm,
PowerHelper::get().is_slow() ? "(slow)" : "");
PowerHelper::get().delay((int) sm, (int) nm);
}
}
}
@@ -1285,9 +1337,9 @@ private:
extern "C" void app_main() {
// Configure dynamic frequency scaling & light sleep if enabled
#ifdef CONFIG_PM_ENABLE
// esp_pm_config_t pm_config = {
// .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true};
// ESP_ERROR_CHECK(esp_pm_configure(&pm_config));
esp_pm_config_t pm_config = {
.max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true};
ESP_ERROR_CHECK(esp_pm_configure(&pm_config));
ESP_ERROR_CHECK(esp_sleep_enable_gpio_wakeup());
#endif

View File

@@ -13,7 +13,9 @@
DMA_ATTR uint8_t SMD::dma_buf[SMD::kLineDataBytes]{};
spi_device_handle_t SMD::_spi;
bool SMD::_vcom = false;
bool SMD::_vcom = false;
bool SMD::_inFlight = false;
spi_transaction_t SMD::_tx{};
// This solution is attributed to Rich Schroeppel in the Programming Hacks section
// TODO: Why does the device flag not work?
@@ -46,14 +48,41 @@ void SMD::clear() {
}
void SMD::draw() {
_vcom = !_vcom;
spi_transaction_t t{};
// Synchronous (blocking) version retained for compatibility
_vcom = !_vcom;
_tx = {};
_tx.tx_buffer = dma_buf;
_tx.length = SMD::kLineDataBytes * 8;
dma_buf[0] = 0b10000000 | (_vcom << 6);
ESP_ERROR_CHECK(spi_device_transmit(_spi, &_tx));
}
t.tx_buffer = dma_buf;
t.length = SMD::kLineDataBytes * 8;
dma_buf[0] = 0b10000000 | (_vcom << 6);
bool SMD::draw_async_busy() { return _inFlight; }
ESP_ERROR_CHECK(spi_device_transmit(_spi, &t));
void SMD::draw_async_start() {
if (_inFlight)
return; // already in flight
_vcom = !_vcom;
_tx = {};
_tx.tx_buffer = dma_buf;
_tx.length = SMD::kLineDataBytes * 8;
dma_buf[0] = 0b10000000 | (_vcom << 6);
esp_err_t err = spi_device_queue_trans(_spi, &_tx, 0);
if (err == ESP_OK)
_inFlight = true;
else
ESP_ERROR_CHECK(err);
}
void SMD::draw_async_wait() {
if (!_inFlight)
return;
spi_transaction_t* r = nullptr;
esp_err_t err;
// Wait indefinitely; could add timeout handling if desired
err = spi_device_get_trans_result(_spi, &r, portMAX_DELAY);
ESP_ERROR_CHECK(err);
_inFlight = false;
}
void SMDSurface::draw_pixel_impl(unsigned x, unsigned y, const BwPixel& pixel) {