diff --git a/Firmware/main/include/disp_tools.hpp b/Firmware/main/include/disp_tools.hpp index 4b0a23a..cbc890c 100644 --- a/Firmware/main/include/disp_tools.hpp +++ b/Firmware/main/include/disp_tools.hpp @@ -41,9 +41,13 @@ namespace DispTools { } } static void draw_to_display() { SMD::draw(); } - static void draw_to_display_async_start() { SMD::draw_async_start(); } - static void draw_to_display_async_wait() { SMD::draw_async_wait(); } - static bool draw_to_display_async_busy() { return SMD::draw_async_busy(); } + // New simplified async pipeline wrappers + static void async_frame_start() { SMD::async_draw_wait(); } // call at frame start + static void async_frame_end() { SMD::async_draw_start(); } // call after rendering + // Legacy names (temporary) mapped to new API in case of straggling calls + static void draw_to_display_async_start() { SMD::async_draw_start(); } + static void draw_to_display_async_wait() { SMD::async_draw_wait(); } + static bool draw_to_display_async_busy() { return SMD::async_draw_busy(); } }; diff --git a/Firmware/main/include/display.hpp b/Firmware/main/include/display.hpp index 8e95269..8e1018e 100644 --- a/Firmware/main/include/display.hpp +++ b/Firmware/main/include/display.hpp @@ -8,6 +8,7 @@ #include "config.hpp" #include "driver/spi_master.h" +// (Async memcpy removed for debugging simplification) #include #include @@ -25,10 +26,15 @@ DMA_ATTR extern uint8_t dma_buf[SMD::kLineDataBytes]; void init(); void clear(); void draw(); -// Asynchronous (DMA queued) draw API -void draw_async_start(); // queue frame transfer if none in flight -void draw_async_wait(); // wait for any in-flight transfer to finish -bool draw_async_busy(); // true if a transfer is in-flight +// Simplified asynchronous frame pipeline: +// Usage pattern each frame: +// SMD::async_draw_wait(); // (start of frame) waits for previous transfer+clear & guarantees pixel area is zeroed +// ... write pixels into dma_buf via set_pixel / surface ... +// SMD::async_draw_start(); // (end of frame) queues SPI DMA of current framebuffer; when DMA completes it triggers +// // a background clear of pixel bytes for next frame +void async_draw_start(); +void async_draw_wait(); +bool async_draw_busy(); // optional diagnostic: is a frame transfer still in flight? static void set_pixel(int x, int y, bool value) { assert(x >= 0 && x < DISP_WIDTH && y >= 0 && y < DISP_HEIGHT); @@ -43,18 +49,26 @@ static void set_pixel(int x, int y, bool value) { } } +extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* trans); static inline spi_device_interface_config_t _devcfg = { - .mode = 0, // SPI mode 0 - .clock_speed_hz = 2 * 1000 * 1000, // Clock out at 10 MHz - .spics_io_num = SPI_DISP_CS, // CS pin - .flags = SPI_DEVICE_POSITIVE_CS, - .queue_size = 3, - // .pre_cb = lcd_spi_pre_transfer_callback, //Specify pre-transfer callback to handle D/C line + .mode = 0, // SPI mode 0 + .clock_speed_hz = 6 * 1000 * 1000, // Clock out at 10 MHz + .spics_io_num = SPI_DISP_CS, // CS pin + .flags = SPI_DEVICE_POSITIVE_CS, + .queue_size = 3, + .pre_cb = nullptr, + .post_cb = s_spi_post_cb, }; extern spi_device_handle_t _spi; extern bool _vcom; -extern bool _inFlight; +extern bool _inFlight; // internal state; exposed only for rare diagnostics extern spi_transaction_t _tx; // persistent transaction struct for async API +// Async memcpy engine handle & clear-in-progress flag +// Async memcpy driver handle (see esp_async_memcpy.h) +// Internal clearing flags (not part of user API anymore) +extern volatile bool _clearBusy; +extern volatile bool _clearRequested; +void ensure_clear_task(); // idempotent; called from init }; // namespace SMD class SMDSurface : public Surface, public StandardEventQueue { diff --git a/Firmware/main/src/app_main.cpp b/Firmware/main/src/app_main.cpp index b7741bd..3d2905f 100644 --- a/Firmware/main/src/app_main.cpp +++ b/Firmware/main/src/app_main.cpp @@ -147,14 +147,14 @@ static inline uint32_t elapsed_ms(uint32_t a, uint32_t b) { return b - a; } struct PerfStats { // Accumulators (microseconds) uint64_t overlayUs = 0, inputUs = 0, logicUs = 0, renderUs = 0, stepUs = 0; - uint64_t rClearUs = 0, rBoardUs = 0, rPiecesUs = 0, rHUDUs = 0, rOverlayUs = 0, rBlitQueueUs = 0, rBlitWaitUs = 0; + uint64_t rBoardUs = 0, rPiecesUs = 0, rHUDUs = 0, rOverlayUs = 0, rBlitQueueUs = 0, rBlitWaitUs = 0; // Counters uint32_t steps = 0; // number of step() iterations during interval uint32_t renders = 0; // number of actual renders (paintHUD calls) during interval uint64_t lastPrintUs = 0; // Last-step / last-render snapshots uint64_t lastOverlayUs = 0, lastInputUs = 0, lastLogicUs = 0, lastRenderUs = 0, lastStepUs = 0; - uint64_t lastRClearUs = 0, lastRBoardUs = 0, lastRPiecesUs = 0, lastRHUDUs = 0, lastROverlayUs = 0; + uint64_t lastRBoardUs = 0, lastRPiecesUs = 0, lastRHUDUs = 0, lastROverlayUs = 0; uint64_t lastRBlitQueueUs = 0, lastRBlitWaitUs = 0; bool lastDidRender = false; void maybePrint(uint64_t nowUs) { @@ -168,13 +168,13 @@ struct PerfStats { auto avR = [&](uint64_t v) { return (double) v / 1000.0 / dRenders; }; double fps = renders * 1000000.0 / (double) span; // real display frame rate printf("PERF steps=%lu frames=%lu span=%.3fs | stepAvg overlay=%.3fms input=%.3fms logic=%.3fms " - "step=%.3fms || renderAvg=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f bltW=%.3f] " + "step=%.3fms || renderAvg=%.3fms [brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f bltW=%.3f] " "fps=%.1f\n", (unsigned long) steps, (unsigned long) renders, (double) span / 1e6, avS(overlayUs), avS(inputUs), - avS(logicUs), avS(stepUs), avR(renderUs), avR(rClearUs), avR(rBoardUs), avR(rPiecesUs), avR(rHUDUs), - avR(rOverlayUs), avR(rBlitQueueUs), avR(rBlitWaitUs), fps); + avS(logicUs), avS(stepUs), avR(renderUs), avR(rBoardUs), avR(rPiecesUs), avR(rHUDUs), avR(rOverlayUs), + avR(rBlitQueueUs), avR(rBlitWaitUs), fps); overlayUs = inputUs = logicUs = renderUs = stepUs = 0; - rClearUs = rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitQueueUs = rBlitWaitUs = 0; + rBoardUs = rPiecesUs = rHUDUs = rOverlayUs = rBlitQueueUs = rBlitWaitUs = 0; steps = renders = 0; lastPrintUs = nowUs; } @@ -408,14 +408,10 @@ public: bool ghost, bool paused, bool gameOver, int fps, int avgFrameMs10) { auto& ps = PerfStats::get(); auto tus = []() { return esp_timer_get_time(); }; - uint64_t t0 = tus(); - fb.clear(false); // white background - uint64_t t1 = tus(); - ps.rClearUs += (t1 - t0); - ps.lastRClearUs = (t1 - t0); + uint64_t t0 = tus(); // frame start (no full clear; buffer already zeroed by async post-blit) drawBatteryOverlay(); rect(ox - 2, oy - 2, bw + 4, bh + 4, true); - uint64_t t2 = tus(); + uint64_t t2 = tus(); for (int y = 0; y < cfg::BoardH; ++y) for (int x = 0; x < cfg::BoardW; ++x) { int v = b.get(x, y); @@ -1145,6 +1141,8 @@ private: ph.set_slow(false); } void paintHUD() { + // Complete previous frame transfer & implicit clear so dma_buf pixel area is zeroed before we draw + SMD::async_draw_wait(); uint64_t rStartUs = esp_timer_get_time(); uint32_t rStart = clock.millis(); // Frame boundary at render start @@ -1173,9 +1171,9 @@ private: ps.renders++; // Pure per-frame render metrics (does not include asynchronous blit wait from previous frame) auto ms = [](uint64_t us) { return (double) us / 1000.0; }; - printf("FRAME render=%.3fms [clr=%.3f brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f]\n", - ms(ps.lastRenderUs), ms(ps.lastRClearUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), - ms(ps.lastRHUDUs), ms(ps.lastROverlayUs), ms(ps.lastRBlitQueueUs)); + printf("FRAME render=%.3fms [brd=%.3f pcs=%.3f hud=%.3f ovl=%.3f bltQ=%.3f]\n", + ms(ps.lastRenderUs), ms(ps.lastRBoardUs), ms(ps.lastRPiecesUs), ms(ps.lastRHUDUs), + ms(ps.lastROverlayUs), ms(ps.lastRBlitQueueUs)); } dirty = false; } @@ -1342,9 +1340,9 @@ private: extern "C" void app_main() { // Configure dynamic frequency scaling & light sleep if enabled #ifdef CONFIG_PM_ENABLE - esp_pm_config_t pm_config = { - .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true}; - ESP_ERROR_CHECK(esp_pm_configure(&pm_config)); + // esp_pm_config_t pm_config = { + // .max_freq_mhz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ, .min_freq_mhz = 16, .light_sleep_enable = true}; + // ESP_ERROR_CHECK(esp_pm_configure(&pm_config)); ESP_ERROR_CHECK(esp_sleep_enable_gpio_wakeup()); #endif diff --git a/Firmware/main/src/display.cpp b/Firmware/main/src/display.cpp index 064af49..1a523c7 100644 --- a/Firmware/main/src/display.cpp +++ b/Firmware/main/src/display.cpp @@ -1,35 +1,84 @@ -// -// Created by Stepan Usatiuk on 02.03.2025. -// +// Simplified display implementation (no async memcpy) --------------------------------- #include "display.hpp" - #include - #include -#include "driver/spi_master.h" - #include "disp_tools.hpp" +#include "driver/spi_master.h" +#include "freertos/FreeRTOS.h" +#include "freertos/semphr.h" +#include "freertos/task.h" -DMA_ATTR uint8_t SMD::dma_buf[SMD::kLineDataBytes]{}; -spi_device_handle_t SMD::_spi; -bool SMD::_vcom = false; -bool SMD::_inFlight = false; -spi_transaction_t SMD::_tx{}; +DMA_ATTR uint8_t SMD::dma_buf[SMD::kLineDataBytes]{}; +spi_device_handle_t SMD::_spi; +bool SMD::_vcom = false; +bool SMD::_inFlight = false; +spi_transaction_t SMD::_tx{}; +volatile bool SMD::_clearBusy = false; +volatile bool SMD::_clearRequested = false; +static TaskHandle_t s_clearTaskHandle = nullptr; +static SemaphoreHandle_t s_clearSem = nullptr; +static volatile bool s_spiDone = false; -// This solution is attributed to Rich Schroeppel in the Programming Hacks section -// TODO: Why does the device flag not work? -unsigned char reverse_bits3(unsigned char b) { return (b * 0x0202020202ULL & 0x010884422010ULL) % 0x3ff; } +static unsigned char reverse_bits3(unsigned char b) { return (b * 0x0202020202ULL & 0x010884422010ULL) % 0x3ff; } + +static void zero_framebuffer_payload() { + std::memset(SMD::dma_buf, 0xFF, SMD::kLineDataBytes); + for (uint8_t i = 0; i < DISP_HEIGHT; i++) { + SMD::dma_buf[SMD::kLineMultiSingle * i + 1] = reverse_bits3(i + 1); + } +} + +extern "C" void IRAM_ATTR s_spi_post_cb(spi_transaction_t* /*t*/) { + s_spiDone = true; + if (!SMD::_clearRequested) + SMD::_clearRequested = true; + if (s_clearSem) { + BaseType_t hpw = pdFALSE; + xSemaphoreGiveFromISR(s_clearSem, &hpw); + if (hpw) + portYIELD_FROM_ISR(); + } +} + +static void clear_task(void*) { + for (;;) { + if (xSemaphoreTake(s_clearSem, portMAX_DELAY) == pdTRUE) { + printf("Started zeroing\n"); + if (s_spiDone && SMD::_inFlight) { + spi_transaction_t* r = nullptr; + if (spi_device_get_trans_result(SMD::_spi, &r, 0) == ESP_OK) { + SMD::_inFlight = false; + } + s_spiDone = false; + } + if (SMD::_clearRequested && !SMD::_clearBusy && !SMD::_inFlight) { + printf("Started zeroing 2\n"); + SMD::_clearBusy = true; + SMD::_clearRequested = false; + zero_framebuffer_payload(); + SMD::_clearBusy = false; + printf("Zeroing done\n"); + } + } + } +} + +void SMD::ensure_clear_task() { + if (!s_clearSem) + s_clearSem = xSemaphoreCreateBinary(); + if (!s_clearTaskHandle) + xTaskCreatePinnedToCore(clear_task, "fbclr", 1536, nullptr, tskIDLE_PRIORITY + 1, &s_clearTaskHandle, 0); +} void SMD::init() { spi_bus_add_device(SPI_BUS, &_devcfg, &_spi); + ensure_clear_task(); ESP_ERROR_CHECK(gpio_reset_pin(SPI_DISP_DISP)); - ESP_ERROR_CHECK(gpio_set_direction(SPI_DISP_DISP, GPIO_MODE_OUTPUT)); ESP_ERROR_CHECK(gpio_set_level(SPI_DISP_DISP, 1)); ESP_ERROR_CHECK(gpio_hold_en(SPI_DISP_DISP)); - for (uint8_t i = 0; i < DISP_HEIGHT; i++) { dma_buf[kLineMultiSingle * i + 1] = reverse_bits3(i + 1); dma_buf[2 + kLineMultiSingle * i + kLineBytes] = 0; @@ -41,67 +90,71 @@ void SMD::clear() { std::array buf{}; buf[0] = 0b00100000; spi_transaction_t t{}; - t.tx_buffer = buf.data(); t.length = buf.size() * 8; ESP_ERROR_CHECK(spi_device_transmit(_spi, &t)); } void SMD::draw() { - // Synchronous (blocking) version retained for compatibility - _vcom = !_vcom; - _tx = {}; + _vcom = !_vcom; + _tx = {}; _tx.tx_buffer = dma_buf; _tx.length = SMD::kLineDataBytes * 8; dma_buf[0] = 0b10000000 | (_vcom << 6); ESP_ERROR_CHECK(spi_device_transmit(_spi, &_tx)); } -bool SMD::draw_async_busy() { return _inFlight; } +bool SMD::async_draw_busy() { return _inFlight; } -void SMD::draw_async_start() { +void SMD::async_draw_start() { if (_inFlight) - return; // already in flight - _vcom = !_vcom; - _tx = {}; + return; + _vcom = !_vcom; + _tx = {}; _tx.tx_buffer = dma_buf; _tx.length = SMD::kLineDataBytes * 8; dma_buf[0] = 0b10000000 | (_vcom << 6); - esp_err_t err = spi_device_queue_trans(_spi, &_tx, 0); - if (err == ESP_OK) + if (spi_device_queue_trans(_spi, &_tx, 0) == ESP_OK) _inFlight = true; - else - ESP_ERROR_CHECK(err); } -void SMD::draw_async_wait() { +void SMD::async_draw_wait() { if (!_inFlight) return; - spi_transaction_t* r = nullptr; - esp_err_t err; - // Wait indefinitely; could add timeout handling if desired - err = spi_device_get_trans_result(_spi, &r, portMAX_DELAY); - ESP_ERROR_CHECK(err); - _inFlight = false; + // Wait until SPI DMA completion harvested and buffer cleared + while (_inFlight || _clearBusy) { + // If ISR fired but task hasn't processed result yet, try harvesting here + if (_inFlight && s_spiDone) { + spi_transaction_t* r = nullptr; + if (spi_device_get_trans_result(_spi, &r, 0) == ESP_OK) { + _inFlight = false; + s_spiDone = false; + // Trigger clear immediately if task not yet scheduled + if (!_clearRequested) { + _clearRequested = true; + if (s_clearSem) + xSemaphoreGive(s_clearSem); + } + } + } + vTaskDelay(1); + } } +// (clear_in_progress / wait_clear / request_clear removed from public API) + +// Surface implementation ------------------------------------------------------ void SMDSurface::draw_pixel_impl(unsigned x, unsigned y, const BwPixel& pixel) { if (pixel.on) DispTools::set_pixel(x, y); else DispTools::reset_pixel(x, y); } - -void SMDSurface::clear_impl() { DispTools::clear(); } - -int SMDSurface::get_width_impl() const { return DISP_WIDTH; } - -int SMDSurface::get_height_impl() const { return DISP_HEIGHT; } - +void SMDSurface::clear_impl() { DispTools::clear(); } +int SMDSurface::get_width_impl() const { return DISP_WIDTH; } +int SMDSurface::get_height_impl() const { return DISP_HEIGHT; } EventHandlingResult SMDSurface::handle(SurfaceResizeEvent event) { return _window->handle(event); } - SMDSurface::SMDSurface(EventLoop* loop) : Surface(), EventQueue(loop, this) {} - SMDSurface::~SMDSurface() {}