diff --git a/Firmware/main/include/bat_mon.hpp b/Firmware/main/include/bat_mon.hpp index 8814ca4..fd3b85a 100644 --- a/Firmware/main/include/bat_mon.hpp +++ b/Firmware/main/include/bat_mon.hpp @@ -20,9 +20,10 @@ public: void pooler(); // FIXME: private: static inline i2c_device_config_t _dev_cfg = { - .dev_addr_length = I2C_ADDR_BIT_LEN_7, - .device_address = 0x36, - .scl_speed_hz = 100000, + .dev_addr_length = I2C_ADDR_BIT_LEN_7, + .device_address = 0x36, + .scl_speed_hz = 100000, + .flags = 0, }; BatMon(); diff --git a/Firmware/main/include/disp_tools.hpp b/Firmware/main/include/disp_tools.hpp index 8ac06ca..4b0a23a 100644 --- a/Firmware/main/include/disp_tools.hpp +++ b/Firmware/main/include/disp_tools.hpp @@ -41,6 +41,9 @@ namespace DispTools { } } static void draw_to_display() { SMD::draw(); } + static void draw_to_display_async_start() { SMD::draw_async_start(); } + static void draw_to_display_async_wait() { SMD::draw_async_wait(); } + static bool draw_to_display_async_busy() { return SMD::draw_async_busy(); } }; diff --git a/Firmware/main/include/display.hpp b/Firmware/main/include/display.hpp index 8e483c0..8e95269 100644 --- a/Firmware/main/include/display.hpp +++ b/Firmware/main/include/display.hpp @@ -25,6 +25,10 @@ DMA_ATTR extern uint8_t dma_buf[SMD::kLineDataBytes]; void init(); void clear(); void draw(); +// Asynchronous (DMA queued) draw API +void draw_async_start(); // queue frame transfer if none in flight +void draw_async_wait(); // wait for any in-flight transfer to finish +bool draw_async_busy(); // true if a transfer is in-flight static void set_pixel(int x, int y, bool value) { assert(x >= 0 && x < DISP_WIDTH && y >= 0 && y < DISP_HEIGHT); @@ -49,6 +53,8 @@ static inline spi_device_interface_config_t _devcfg = { }; extern spi_device_handle_t _spi; extern bool _vcom; +extern bool _inFlight; +extern spi_transaction_t _tx; // persistent transaction struct for async API }; // namespace SMD class SMDSurface : public Surface, public StandardEventQueue { diff --git a/Firmware/main/src/app_main.cpp b/Firmware/main/src/app_main.cpp index e93f492..124122c 100644 --- a/Firmware/main/src/app_main.cpp +++ b/Firmware/main/src/app_main.cpp @@ -133,9 +133,10 @@ struct PlatformClock final : IClock { return (uint32_t) ((uint64_t) t * 1000ULL / configTICK_RATE_HZ); } void sleep_ms(uint32_t ms) override { - // Pass a longer delay when slow mode is active, cap normal delay for responsiveness - int slow_ms = (int) ms; // allow full requested sleep when slow - int normal_ms = (int) std::min(ms, 30); // cap active-mode sleep to 30ms + // Unified: request same duration for slow and active. In slow mode a button press wakes early. + // In active mode we accept the full requested sleep (responsiveness governed by recommendedSleepMs cap). + int slow_ms = (int) ms; + int normal_ms = (int) ms; // no artificial cap PowerHelper::get().delay(slow_ms, normal_ms); } }; @@ -144,31 +145,38 @@ static inline uint32_t elapsed_ms(uint32_t a, uint32_t b) { return b - a; } // High precision performance stats (microsecond accumulation via esp_timer) struct PerfStats { - uint64_t overlayUs = 0, inputUs = 0, logicUs = 0, renderUs = 0, stepUs = 0; // high-level phases (us) - uint64_t rClearUs = 0, rBoardUs = 0, rPiecesUs = 0, rHUDUs = 0, rOverlayUs = 0, - rBlitUs = 0; // render sub-phases (us) - uint32_t frames = 0; + // Accumulators (microseconds) + uint64_t overlayUs = 0, inputUs = 0, logicUs = 0, renderUs = 0, stepUs = 0; + uint64_t rClearUs = 0, rBoardUs = 0, rPiecesUs = 0, rHUDUs = 0, rOverlayUs = 0, rBlitQueueUs = 0, rBlitWaitUs = 0; + // Counters + uint32_t steps = 0; // number of step() iterations during interval + uint32_t renders = 0; // number of actual renders (paintHUD calls) during interval uint64_t lastPrintUs = 0; - // Last-frame snapshots (microseconds) for immediate per-frame printing + // Last-step / last-render snapshots uint64_t lastOverlayUs = 0, lastInputUs = 0, lastLogicUs = 0, lastRenderUs = 0, lastStepUs = 0; - uint64_t lastRClearUs = 0, lastRBoardUs = 0, lastRPiecesUs = 0, lastRHUDUs = 0, lastROverlayUs = 0, lastRBlitUs = 0; - bool lastDidRender = false; // whether a render (paintHUD) happened this frame + uint64_t lastRClearUs = 0, lastRBoardUs = 0, lastRPiecesUs = 0, lastRHUDUs = 0, lastROverlayUs = 0; + uint64_t lastRBlitQueueUs = 0, lastRBlitWaitUs = 0; + bool lastDidRender = false; void maybePrint(uint64_t nowUs) { if (!lastPrintUs) lastPrintUs = nowUs; uint64_t span = nowUs - lastPrintUs; - if (span >= 1000000ULL && frames) { - double f = frames; - auto av = [&](uint64_t v) { return (double) v / 1000.0 / f; }; // convert us->ms per frame - double fps = f * 1000000.0 / span; - printf("PERF overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f " - "ovl=%.3f blt=%.3f] step=%.3fms fps=%.1f\n", - av(overlayUs), av(inputUs), av(logicUs), av(renderUs), av(rClearUs), av(rBoardUs), av(rPiecesUs), - av(rHUDUs), av(rOverlayUs), av(rBlitUs), av(stepUs), fps); + if (span >= 1000000ULL && (steps || renders)) { + double dSteps = steps ? (double) steps : 1.0; + double dRenders = renders ? (double) renders : 1.0; + auto avS = [&](uint64_t v) { return (double) v / 1000.0 / dSteps; }; + auto avR = [&](uint64_t v) { return (double) v / 1000.0 / dRenders; }; + double fps = renders * 1000000.0 / (double) span; // real display frame rate + printf("PERF steps=%lu frames=%lu span=%.3fs | stepAvg overlay=%.3fms input=%.3fms logic=%.3fms " + "step=%.3fms || renderAvg=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f bltW=%.3f] " + "fps=%.1f\n", + (unsigned long) steps, (unsigned long) renders, (double) span / 1e6, avS(overlayUs), avS(inputUs), + avS(logicUs), avS(stepUs), avR(renderUs), avR(rClearUs), avR(rBoardUs), avR(rPiecesUs), avR(rHUDUs), + avR(rOverlayUs), avR(rBlitQueueUs), avR(rBlitWaitUs), fps); overlayUs = inputUs = logicUs = renderUs = stepUs = 0; - rClearUs = rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitUs = 0; - frames = 0; - lastPrintUs = nowUs; + rClearUs = rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitQueueUs = rBlitWaitUs = 0; + steps = renders = 0; + lastPrintUs = nowUs; } } static PerfStats& get() { @@ -471,10 +479,13 @@ public: uint64_t t6 = tus(); ps.rOverlayUs += (t6 - t5); ps.lastROverlayUs = (t6 - t5); - DispTools::draw_to_display(); - uint64_t t7 = tus(); - ps.rBlitUs += (t7 - t6); - ps.lastRBlitUs = (t7 - t6); + // Queue async SPI transfer (DMA) and measure only queue overhead here. + uint64_t bqStart = tus(); + DispTools::draw_to_display_async_start(); + uint64_t t7 = tus(); + uint64_t qdur = (t7 - bqStart); + ps.rBlitQueueUs += qdur; // enqueue overhead + ps.lastRBlitQueueUs = qdur; } // Consistent full bordered cell drawing (square regardless of neighbors) @@ -878,7 +889,17 @@ public: uint64_t stepStartUs = esp_timer_get_time(); uint32_t now = clock.millis(); PerfStats::get().lastDidRender = false; // reset per-frame render flag - const uint32_t oStart = clock.millis(); + // Complete any previous in-flight async display transfer and attribute its duration to blit time + if (DispTools::draw_to_display_async_busy()) { + uint64_t bwStart = esp_timer_get_time(); + DispTools::draw_to_display_async_wait(); + uint64_t bwEnd = esp_timer_get_time(); + auto& ps = PerfStats::get(); + uint64_t dur = (bwEnd - bwStart); + ps.rBlitWaitUs += dur; // actual DMA completion time + ps.lastRBlitWaitUs = dur; + } + const uint32_t oStart = clock.millis(); overlayTick(now); const uint32_t oEnd = clock.millis(); { @@ -897,6 +918,7 @@ public: ps.lastInputUs = dur; } uint64_t logicStartUs = esp_timer_get_time(); + enforceSlowMode(); if (!running) { bool anyPressed = st.left || st.right || st.down || st.rotate || st.back; uint32_t sinceGameOver = elapsed_ms(gameOverTime, now); @@ -928,18 +950,16 @@ public: ps.stepUs += dur; ps.lastStepUs = dur; // Print per-frame breakdown (no render this frame if not dirty) - auto ms = [](uint64_t us) { return (double) us / 1000.0; }; - if (ps.lastDidRender) { - printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f " - "hud=%.3f ovl=%.3f blt=%.3f] step=%.3fms\n", - ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastRenderUs), - ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs), - ms(ps.lastROverlayUs), ms(ps.lastRBlitUs), ms(ps.lastStepUs)); + auto ms = [](uint64_t us) { return (double) us / 1000.0; }; + double waitMs = ms(ps.lastRBlitWaitUs); + if (waitMs > 0.0005) { + printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms bltW=%.3fms step=%.3fms\n", + ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), waitMs, ms(ps.lastStepUs)); } else { printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastStepUs)); } - ps.frames++; + ps.steps++; ps.maybePrint(stepEndUs); } return; @@ -947,8 +967,7 @@ public: // Pause toggle if (st.back && !backPrev) { paused = !paused; - PowerHelper::get().set_slow(paused); - dirty = true; + dirty = true; } backPrev = st.back; if (paused) { @@ -967,18 +986,16 @@ public: uint64_t dur = (stepEndUs - stepStartUs); ps.stepUs += dur; ps.lastStepUs = dur; - auto ms = [](uint64_t us) { return (double) us / 1000.0; }; - if (ps.lastDidRender) { - printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f " - "hud=%.3f ovl=%.3f blt=%.3f] step=%.3fms\n", - ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastRenderUs), - ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs), - ms(ps.lastROverlayUs), ms(ps.lastRBlitUs), ms(ps.lastStepUs)); + auto ms = [](uint64_t us) { return (double) us / 1000.0; }; + double waitMs = ms(ps.lastRBlitWaitUs); + if (waitMs > 0.0005) { + printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms bltW=%.3fms step=%.3fms\n", + ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), waitMs, ms(ps.lastStepUs)); } else { printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastStepUs)); } - ps.frames++; + ps.steps++; ps.maybePrint(stepEndUs); } return; @@ -1020,34 +1037,40 @@ public: uint64_t dur = (stepEndUs - stepStartUs); ps.stepUs += dur; ps.lastStepUs = dur; - auto ms = [](uint64_t us) { return (double) us / 1000.0; }; - if (ps.lastDidRender) { - printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f " - "hud=%.3f ovl=%.3f blt=%.3f] step=%.3fms\n", - ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastRenderUs), - ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs), - ms(ps.lastROverlayUs), ms(ps.lastRBlitUs), ms(ps.lastStepUs)); + auto ms = [](uint64_t us) { return (double) us / 1000.0; }; + double waitMs = ms(ps.lastRBlitWaitUs); + if (waitMs > 0.0005) { + printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms bltW=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs), + ms(ps.lastInputUs), ms(ps.lastLogicUs), waitMs, ms(ps.lastStepUs)); } else { printf("STEP overlay=%.3fms input=%.3fms logic=%.3fms step=%.3fms\n", ms(ps.lastOverlayUs), ms(ps.lastInputUs), ms(ps.lastLogicUs), ms(ps.lastStepUs)); } - ps.frames++; + ps.steps++; ps.maybePrint(stepEndUs); } } - uint32_t recommendedSleepMs(uint32_t now) const { + struct SleepPlan { + uint32_t slow_ms; // long sleep allowing battery/UI periodic refresh + uint32_t normal_ms; // short sleep for responsiveness on input wake + }; + SleepPlan recommendedSleepMs(uint32_t now) const { + SleepPlan plan{0, 0}; if (dirty) - return 0; - if (paused || !running) { - uint32_t untilOverlay = - (lastOverlayUpd + overlayIntervalMs > now) ? (lastOverlayUpd + overlayIntervalMs - now) : 0; - uint32_t cap = (paused || !running) ? 2000u : 500u; - if (untilOverlay > cap) - untilOverlay = cap; - return untilOverlay; + return plan; // both zero => no sleep + bool idleState = paused || !running; + if (idleState) { + uint32_t interval = overlayIntervalMs * 8; // extended overlay refresh gap + uint32_t since = elapsed_ms(lastOverlayUpd, now); + uint32_t untilOverlay = (since >= interval) ? 0 : (interval - since); + if (untilOverlay > idleLongCapMs) + untilOverlay = idleLongCapMs; + plan.slow_ms = untilOverlay ? untilOverlay : idleShortPollMs; + plan.normal_ms = idleActivePollMs; + return plan; } - // Estimate next gravity event + // Active gameplay uint32_t g = score.dropMs; uint32_t sinceFall = elapsed_ms(lastFall, now); uint32_t untilDrop = (sinceFall >= g) ? 0 : (g - sinceFall); @@ -1056,19 +1079,27 @@ public: uint32_t untilLock = (sinceTouch >= (uint32_t) cfg::LockDelayMs) ? 0 : (cfg::LockDelayMs - sinceTouch); untilDrop = std::min(untilDrop, untilLock); } - uint32_t untilOverlay = - (lastOverlayUpd + overlayIntervalMs > now) ? (lastOverlayUpd + overlayIntervalMs - now) : 0; - uint32_t sleep = std::min(untilDrop, untilOverlay); + uint32_t interval = overlayIntervalMs; + uint32_t sinceOverlay = elapsed_ms(lastOverlayUpd, now); + uint32_t untilOverlay = (sinceOverlay >= interval) ? 0 : (interval - sinceOverlay); + uint32_t sleep = std::min(untilDrop, untilOverlay); if (sleep > maxIdleSleepMs) sleep = maxIdleSleepMs; - return sleep; + plan.slow_ms = sleep; + plan.normal_ms = std::min(sleep, activeNormalCapMs); + return plan; } private: // Power-aware overlay throttling static constexpr uint32_t overlayIntervalMs = 500; // base interval (paused uses multiplier) - static constexpr uint32_t maxIdleSleepMs = 120; // cap to keep input responsive + static constexpr uint32_t maxIdleSleepMs = 120; // active state cap to keep input responsive static constexpr uint32_t gameOverRestartDelayMs = 2000; // ms grace period before restart allowed + // Idle (paused/game over) timing tunables + static constexpr uint32_t idleLongCapMs = 2500; // longest deep sleep slice while idle (<= overlay 4s) + static constexpr uint32_t idleShortPollMs = 50; // minimal periodic wake to check overlay refresh/input + static constexpr uint32_t idleActivePollMs = 40; // normal_ms provided to PowerHelper for responsiveness + static constexpr uint32_t activeNormalCapMs = 40; // cap for normal_ms during active play IFramebuffer& fb; IInput& input; @@ -1100,6 +1131,14 @@ private: dirty = true; } } + void enforceSlowMode() { + auto& ph = PowerHelper::get(); + bool wantSlow = paused || !running; + if (wantSlow && !ph.is_slow()) + ph.set_slow(true); + else if (!wantSlow && ph.is_slow()) + ph.set_slow(false); + } void paintHUD() { uint64_t rStartUs = esp_timer_get_time(); uint32_t rStart = clock.millis(); @@ -1126,6 +1165,12 @@ private: ps.renderUs += dur; ps.lastRenderUs = dur; ps.lastDidRender = true; + ps.renders++; + // Pure per-frame render metrics (does not include asynchronous blit wait from previous frame) + auto ms = [](uint64_t us) { return (double) us / 1000.0; }; + printf("FRAME render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f]\n", + ms(ps.lastRenderUs), ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), + ms(ps.lastRHUDUs), ms(ps.lastROverlayUs), ms(ps.lastRBlitQueueUs)); } dirty = false; } @@ -1138,7 +1183,7 @@ private: paused = false; touchingGround = false; rotPrev = lHeld = rHeld = backPrev = false; - PowerHelper::get().set_slow(false); + // slow mode will be re-evaluated centrally on next step gameOverTime = 0; gameOverPrevPressed = false; spawn(); @@ -1157,6 +1202,7 @@ private: running = false; gameOverTime = clock.millis(); gameOverPrevPressed = true; // require a release after delay + // slow mode applied centrally next step } } @@ -1237,6 +1283,7 @@ private: running = false; gameOverTime = clock.millis(); gameOverPrevPressed = true; + // slow mode applied centrally next step return; } int c = board.clearLines(); @@ -1264,11 +1311,16 @@ public: ~App() { delete game; } void runForever() { while (true) { - uint32_t now = clock.millis(); game->step(); - uint32_t sleepMs = game->recommendedSleepMs(now); - if (sleepMs) { - clock.sleep_ms(sleepMs); + // Use time AFTER step for more accurate sleep scheduling (prevents fast loops when paused) + uint32_t now = clock.millis(); + auto plan = game->recommendedSleepMs(now); + uint32_t sm = plan.slow_ms; + uint32_t nm = plan.normal_ms; + if (sm || nm) { + printf("Sleep slow=%lu normal=%lu %s\n", (unsigned long) sm, (unsigned long) nm, + PowerHelper::get().is_slow() ? "(slow)" : ""); + PowerHelper::get().delay((int) sm, (int) nm); } } } @@ -1285,9 +1337,9 @@ private: extern "C" void app_main() { // Configure dynamic frequency scaling & light sleep if enabled #ifdef CONFIG_PM_ENABLE - // esp_pm_config_t pm_config = { - // .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true}; - // ESP_ERROR_CHECK(esp_pm_configure(&pm_config)); + esp_pm_config_t pm_config = { + .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true}; + ESP_ERROR_CHECK(esp_pm_configure(&pm_config)); ESP_ERROR_CHECK(esp_sleep_enable_gpio_wakeup()); #endif diff --git a/Firmware/main/src/display.cpp b/Firmware/main/src/display.cpp index 28bf842..064af49 100644 --- a/Firmware/main/src/display.cpp +++ b/Firmware/main/src/display.cpp @@ -13,7 +13,9 @@ DMA_ATTR uint8_t SMD::dma_buf[SMD::kLineDataBytes]{}; spi_device_handle_t SMD::_spi; -bool SMD::_vcom = false; +bool SMD::_vcom = false; +bool SMD::_inFlight = false; +spi_transaction_t SMD::_tx{}; // This solution is attributed to Rich Schroeppel in the Programming Hacks section // TODO: Why does the device flag not work? @@ -46,14 +48,41 @@ void SMD::clear() { } void SMD::draw() { - _vcom = !_vcom; - spi_transaction_t t{}; + // Synchronous (blocking) version retained for compatibility + _vcom = !_vcom; + _tx = {}; + _tx.tx_buffer = dma_buf; + _tx.length = SMD::kLineDataBytes * 8; + dma_buf[0] = 0b10000000 | (_vcom << 6); + ESP_ERROR_CHECK(spi_device_transmit(_spi, &_tx)); +} - t.tx_buffer = dma_buf; - t.length = SMD::kLineDataBytes * 8; - dma_buf[0] = 0b10000000 | (_vcom << 6); +bool SMD::draw_async_busy() { return _inFlight; } - ESP_ERROR_CHECK(spi_device_transmit(_spi, &t)); +void SMD::draw_async_start() { + if (_inFlight) + return; // already in flight + _vcom = !_vcom; + _tx = {}; + _tx.tx_buffer = dma_buf; + _tx.length = SMD::kLineDataBytes * 8; + dma_buf[0] = 0b10000000 | (_vcom << 6); + esp_err_t err = spi_device_queue_trans(_spi, &_tx, 0); + if (err == ESP_OK) + _inFlight = true; + else + ESP_ERROR_CHECK(err); +} + +void SMD::draw_async_wait() { + if (!_inFlight) + return; + spi_transaction_t* r = nullptr; + esp_err_t err; + // Wait indefinitely; could add timeout handling if desired + err = spi_device_get_trans_result(_spi, &r, portMAX_DELAY); + ESP_ERROR_CHECK(err); + _inFlight = false; } void SMDSurface::draw_pixel_impl(unsigned x, unsigned y, const BwPixel& pixel) {