diff --git a/Firmware/main/include/disp_tools.hpp b/Firmware/main/include/disp_tools.hpp index cbc890c..3a01c66 100644 --- a/Firmware/main/include/disp_tools.hpp +++ b/Firmware/main/include/disp_tools.hpp @@ -40,7 +40,6 @@ namespace DispTools { reset_pixel(x, y); } } - static void draw_to_display() { SMD::draw(); } // New simplified async pipeline wrappers static void async_frame_start() { SMD::async_draw_wait(); } // call at frame start static void async_frame_end() { SMD::async_draw_start(); } // call after rendering diff --git a/Firmware/main/include/display.hpp b/Firmware/main/include/display.hpp index 8e1018e..312f5c3 100644 --- a/Firmware/main/include/display.hpp +++ b/Firmware/main/include/display.hpp @@ -21,11 +21,9 @@ static constexpr size_t kLineBytes = DISP_WIDTH / 8; static constexpr size_t kLineMultiSingle = (kLineBytes + 2); static constexpr size_t kLineDataBytes = kLineMultiSingle * DISP_HEIGHT + 2; -DMA_ATTR extern uint8_t dma_buf[SMD::kLineDataBytes]; +extern uint8_t dma_buf[SMD::kLineDataBytes]; void init(); -void clear(); -void draw(); // Simplified asynchronous frame pipeline: // Usage pattern each frame: // SMD::async_draw_wait(); // (start of frame) waits for previous transfer+clear & guarantees pixel area is zeroed @@ -49,26 +47,19 @@ static void set_pixel(int x, int y, bool value) { } } -extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* trans); +extern "C" void s_spi_post_cb(spi_transaction_t* trans); + static inline spi_device_interface_config_t _devcfg = { - .mode = 0, // SPI mode 0 - .clock_speed_hz = 6 * 1000 * 1000, // Clock out at 10 MHz - .spics_io_num = SPI_DISP_CS, // CS pin - .flags = SPI_DEVICE_POSITIVE_CS, - .queue_size = 3, - .pre_cb = nullptr, - .post_cb = s_spi_post_cb, + .mode = 0, // SPI mode 0 + .clock_speed_hz = 6 * 1000 * 1000, // Clock out at 10 MHz + .spics_io_num = SPI_DISP_CS, // CS pin + .flags = SPI_DEVICE_POSITIVE_CS, + .queue_size = 3, + .pre_cb = nullptr, + .post_cb = s_spi_post_cb, }; extern spi_device_handle_t _spi; -extern bool _vcom; -extern bool _inFlight; // internal state; exposed only for rare diagnostics -extern spi_transaction_t _tx; // persistent transaction struct for async API -// Async memcpy engine handle & clear-in-progress flag -// Async memcpy driver handle (see esp_async_memcpy.h) -// Internal clearing flags (not part of user API anymore) -extern volatile bool _clearBusy; -extern volatile bool _clearRequested; -void ensure_clear_task(); // idempotent; called from init +void ensure_clear_task(); // idempotent; called from init }; // namespace SMD class SMDSurface : public Surface, public StandardEventQueue { diff --git a/Firmware/main/src/app_main.cpp b/Firmware/main/src/app_main.cpp index 3d2905f..425f6c2 100644 --- a/Firmware/main/src/app_main.cpp +++ b/Firmware/main/src/app_main.cpp @@ -171,8 +171,8 @@ struct PerfStats { "step=%.3fms || renderAvg=%.3fms [brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f bltW=%.3f] " "fps=%.1f\n", (unsigned long) steps, (unsigned long) renders, (double) span / 1e6, avS(overlayUs), avS(inputUs), - avS(logicUs), avS(stepUs), avR(renderUs), avR(rBoardUs), avR(rPiecesUs), avR(rHUDUs), avR(rOverlayUs), - avR(rBlitQueueUs), avR(rBlitWaitUs), fps); + avS(logicUs), avS(stepUs), avR(renderUs), avR(rBoardUs), avR(rPiecesUs), avR(rHUDUs), + avR(rOverlayUs), avR(rBlitQueueUs), avR(rBlitWaitUs), fps); overlayUs = inputUs = logicUs = renderUs = stepUs = 0; rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitQueueUs = rBlitWaitUs = 0; steps = renders = 0; @@ -408,10 +408,10 @@ public: bool ghost, bool paused, bool gameOver, int fps, int avgFrameMs10) { auto& ps = PerfStats::get(); auto tus = []() { return esp_timer_get_time(); }; - uint64_t t0 = tus(); // frame start (no full clear; buffer already zeroed by async post-blit) + uint64_t t0 = tus(); // frame start (no full clear; buffer already zeroed by async post-blit) drawBatteryOverlay(); rect(ox - 2, oy - 2, bw + 4, bh + 4, true); - uint64_t t2 = tus(); + uint64_t t2 = tus(); for (int y = 0; y < cfg::BoardH; ++y) for (int x = 0; x < cfg::BoardW; ++x) { int v = b.get(x, y); @@ -899,6 +899,8 @@ public: uint64_t dur = (bwEnd - bwStart); ps.rBlitWaitUs += dur; // actual DMA completion time ps.lastRBlitWaitUs = dur; + } else { + PerfStats::get().lastRBlitWaitUs = 0; } const uint32_t oStart = clock.millis(); overlayTick(now); @@ -1093,11 +1095,11 @@ public: private: // Power-aware overlay throttling - static constexpr uint32_t overlayIntervalMs = 500; // base interval (paused uses multiplier) + static constexpr uint32_t overlayIntervalMs = 1000; // base interval (paused uses multiplier) static constexpr uint32_t maxIdleSleepMs = 120; // active state cap to keep input responsive static constexpr uint32_t gameOverRestartDelayMs = 2000; // ms grace period before restart allowed // Idle (paused/game over) timing tunables - static constexpr uint32_t idleLongCapMs = 2500; // longest deep sleep slice while idle (<= overlay 4s) + static constexpr uint32_t idleLongCapMs = 5000; // longest deep sleep slice while idle (<= overlay 4s) static constexpr uint32_t idleShortPollMs = 50; // minimal periodic wake to check overlay refresh/input static constexpr uint32_t idleActivePollMs = 40; // normal_ms provided to PowerHelper for responsiveness static constexpr uint32_t activeNormalCapMs = 40; // cap for normal_ms during active play @@ -1171,9 +1173,9 @@ private: ps.renders++; // Pure per-frame render metrics (does not include asynchronous blit wait from previous frame) auto ms = [](uint64_t us) { return (double) us / 1000.0; }; - printf("FRAME render=%.3fms [brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f]\n", - ms(ps.lastRenderUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs), - ms(ps.lastROverlayUs), ms(ps.lastRBlitQueueUs)); + printf("FRAME render=%.3fms [brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f]\n", ms(ps.lastRenderUs), + ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs), ms(ps.lastROverlayUs), + ms(ps.lastRBlitQueueUs)); } dirty = false; } @@ -1340,9 +1342,9 @@ private: extern "C" void app_main() { // Configure dynamic frequency scaling & light sleep if enabled #ifdef CONFIG_PM_ENABLE - // esp_pm_config_t pm_config = { - // .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true}; - // ESP_ERROR_CHECK(esp_pm_configure(&pm_config)); + esp_pm_config_t pm_config = { + .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true}; + ESP_ERROR_CHECK(esp_pm_configure(&pm_config)); ESP_ERROR_CHECK(esp_sleep_enable_gpio_wakeup()); #endif diff --git a/Firmware/main/src/display.cpp b/Firmware/main/src/display.cpp index 1a523c7..568e8e3 100644 --- a/Firmware/main/src/display.cpp +++ b/Firmware/main/src/display.cpp @@ -5,68 +5,68 @@ #include #include "disp_tools.hpp" #include "driver/spi_master.h" +#include "esp_async_memcpy.h" #include "freertos/FreeRTOS.h" #include "freertos/semphr.h" #include "freertos/task.h" -DMA_ATTR uint8_t SMD::dma_buf[SMD::kLineDataBytes]{}; -spi_device_handle_t SMD::_spi; -bool SMD::_vcom = false; -bool SMD::_inFlight = false; -spi_transaction_t SMD::_tx{}; -volatile bool SMD::_clearBusy = false; -volatile bool SMD::_clearRequested = false; -static TaskHandle_t s_clearTaskHandle = nullptr; -static SemaphoreHandle_t s_clearSem = nullptr; -static volatile bool s_spiDone = false; +DMA_ATTR uint8_t SMD::dma_buf[SMD::kLineDataBytes]{}; +DMA_ATTR uint8_t dma_buf_template[SMD::kLineDataBytes]{}; + +spi_device_handle_t SMD::_spi; + +static spi_transaction_t _tx{}; +static bool _vcom = false; +volatile bool _inFlight = false; +static TaskHandle_t s_clearTaskHandle = nullptr; +static SemaphoreHandle_t s_clearReqSem = nullptr; +static SemaphoreHandle_t s_clearSem = nullptr; + +static async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG(); +// update the maximum data stream supported by underlying DMA engine +static async_memcpy_handle_t driver = NULL; static unsigned char reverse_bits3(unsigned char b) { return (b * 0x0202020202ULL & 0x010884422010ULL) % 0x3ff; } + +static bool IRAM_ATTR my_async_memcpy_cb(async_memcpy_handle_t mcp_hdl, async_memcpy_event_t* event, void* cb_args) { + BaseType_t high_task_wakeup = pdFALSE; + _inFlight = false; + xSemaphoreGiveFromISR(s_clearSem, + &high_task_wakeup); // high_task_wakeup set to pdTRUE if some high priority task unblocked + return high_task_wakeup == pdTRUE; +} + static void zero_framebuffer_payload() { - std::memset(SMD::dma_buf, 0xFF, SMD::kLineDataBytes); - for (uint8_t i = 0; i < DISP_HEIGHT; i++) { - SMD::dma_buf[SMD::kLineMultiSingle * i + 1] = reverse_bits3(i + 1); - } + ESP_ERROR_CHECK(esp_async_memcpy(driver, SMD::dma_buf, dma_buf_template, 12480, my_async_memcpy_cb, nullptr)); } extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* /*t*/) { - s_spiDone = true; - if (!SMD::_clearRequested) - SMD::_clearRequested = true; - if (s_clearSem) { - BaseType_t hpw = pdFALSE; - xSemaphoreGiveFromISR(s_clearSem, &hpw); - if (hpw) - portYIELD_FROM_ISR(); - } + BaseType_t hpw = pdFALSE; + xSemaphoreGiveFromISR(s_clearReqSem, &hpw); + if (hpw) + portYIELD_FROM_ISR(); } static void clear_task(void*) { for (;;) { - if (xSemaphoreTake(s_clearSem, portMAX_DELAY) == pdTRUE) { + if (xSemaphoreTake(s_clearReqSem, portMAX_DELAY) == pdTRUE) { printf("Started zeroing\n"); - if (s_spiDone && SMD::_inFlight) { - spi_transaction_t* r = nullptr; - if (spi_device_get_trans_result(SMD::_spi, &r, 0) == ESP_OK) { - SMD::_inFlight = false; - } - s_spiDone = false; - } - if (SMD::_clearRequested && !SMD::_clearBusy && !SMD::_inFlight) { - printf("Started zeroing 2\n"); - SMD::_clearBusy = true; - SMD::_clearRequested = false; - zero_framebuffer_payload(); - SMD::_clearBusy = false; - printf("Zeroing done\n"); - } + spi_transaction_t* r = nullptr; + ESP_ERROR_CHECK(spi_device_get_trans_result(SMD::_spi, &r, 0)); + zero_framebuffer_payload(); + // printf("Zeroing done\n"); } } } void SMD::ensure_clear_task() { + if (!s_clearReqSem) + s_clearReqSem = xSemaphoreCreateBinary(); if (!s_clearSem) s_clearSem = xSemaphoreCreateBinary(); + xSemaphoreGive(s_clearSem); + if (!s_clearTaskHandle) xTaskCreatePinnedToCore(clear_task, "fbclr", 1536, nullptr, tskIDLE_PRIORITY + 1, &s_clearTaskHandle, 0); } @@ -84,61 +84,38 @@ void SMD::init() { dma_buf[2 + kLineMultiSingle * i + kLineBytes] = 0; } dma_buf[kLineDataBytes - 1] = 0; -} - -void SMD::clear() { - std::array buf{}; - buf[0] = 0b00100000; - spi_transaction_t t{}; - t.tx_buffer = buf.data(); - t.length = buf.size() * 8; - ESP_ERROR_CHECK(spi_device_transmit(_spi, &t)); -} - -void SMD::draw() { - _vcom = !_vcom; - _tx = {}; - _tx.tx_buffer = dma_buf; - _tx.length = SMD::kLineDataBytes * 8; - dma_buf[0] = 0b10000000 | (_vcom << 6); - ESP_ERROR_CHECK(spi_device_transmit(_spi, &_tx)); + for (int y = 0; y < DISP_HEIGHT; ++y) + for (int x = 0; x < DISP_WIDTH; ++x) + DispTools::set_pixel(x, y, false); + std::memcpy(dma_buf_template, dma_buf, sizeof(dma_buf_template)); + ESP_ERROR_CHECK(esp_async_memcpy_install(&config, &driver)); // install driver with default DMA engine } bool SMD::async_draw_busy() { return _inFlight; } void SMD::async_draw_start() { - if (_inFlight) - return; + assert(!_inFlight); + if (!xSemaphoreTake(s_clearSem, portMAX_DELAY)) + assert(false); _vcom = !_vcom; _tx = {}; _tx.tx_buffer = dma_buf; _tx.length = SMD::kLineDataBytes * 8; dma_buf[0] = 0b10000000 | (_vcom << 6); - if (spi_device_queue_trans(_spi, &_tx, 0) == ESP_OK) - _inFlight = true; + _inFlight = true; + ESP_ERROR_CHECK(spi_device_queue_trans(_spi, &_tx, 0)); } void SMD::async_draw_wait() { - if (!_inFlight) + if (uxSemaphoreGetCount(s_clearSem) || !_inFlight) { + assert((uxSemaphoreGetCount(s_clearSem) == 0) == _inFlight); return; - // Wait until SPI DMA completion harvested and buffer cleared - while (_inFlight || _clearBusy) { - // If ISR fired but task hasn't processed result yet, try harvesting here - if (_inFlight && s_spiDone) { - spi_transaction_t* r = nullptr; - if (spi_device_get_trans_result(_spi, &r, 0) == ESP_OK) { - _inFlight = false; - s_spiDone = false; - // Trigger clear immediately if task not yet scheduled - if (!_clearRequested) { - _clearRequested = true; - if (s_clearSem) - xSemaphoreGive(s_clearSem); - } - } - } - vTaskDelay(1); } + if (!xSemaphoreTake(s_clearSem, portMAX_DELAY)) + assert(false); + if (!xSemaphoreGive(s_clearSem)) + assert(false); + assert(!_inFlight); } // (clear_in_progress / wait_clear / request_clear removed from public API)