some stuff

This commit is contained in:
2025-10-12 13:51:50 +02:00
parent df57e55171
commit 83ba775971
3 changed files with 291 additions and 33 deletions

4
Firmware/AGENTS.md Normal file
View File

@@ -0,0 +1,4 @@
To build:
(in zsh)
. "$HOME/esp/esp-idf/export.sh"
idf.py build

248
Firmware/ghettoprof.sh Executable file
View File

@@ -0,0 +1,248 @@
#!/usr/bin/env bash
# parallel-pc-profile.sh — parallel symbol resolver + optional annotated disassembly
# Supports C++ demangling, LLVM disassembler, and optional no-inlines aggregation (symbol-table based).
#
# Usage:
# ./parallel-pc-profile.sh [-j jobs] [--annotate] [--no-inlines] firmware.elf pcs.txt
set -euo pipefail
usage() {
echo "Usage: $0 [-j jobs] [--annotate] [--no-inlines] firmware.elf pcs.txt"
exit 1
}
ANNOTATE=0
JOBS=""
NO_INLINES=0
# ---- args ----
while [[ $# -gt 0 ]]; do
case "$1" in
-j) JOBS="$2"; shift 2 ;;
--annotate) ANNOTATE=1; shift ;;
--no-inlines) NO_INLINES=1; shift ;;
-h|--help) usage ;;
*) break ;;
esac
done
[[ $# -lt 2 ]] && usage
ELF="$1"
PCS_IN="$2"
[[ ! -f "$ELF" ]] && { echo "ELF not found: $ELF" >&2; exit 2; }
[[ ! -f "$PCS_IN" ]] && { echo "PC log not found: $PCS_IN" >&2; exit 3; }
# ---- tools ----
ADDR2LINE=""
for t in llvm-addr2line eu-addr2line riscv32-esp-elf-addr2line xtensa-esp32-elf-addr2line addr2line; do
if command -v "$t" >/dev/null 2>&1; then ADDR2LINE="$t"; break; fi
done
[[ -z "$ADDR2LINE" ]] && { echo "No addr2line found"; exit 4; }
if command -v llvm-objdump >/dev/null 2>&1; then
OBJDUMP="llvm-objdump"
else
for t in riscv32-esp-elf-objdump xtensa-esp32-elf-objdump objdump; do
if command -v "$t" >/dev/null 2>&1; then OBJDUMP="$t"; break; fi
done
fi
[[ -z "${OBJDUMP:-}" ]] && { echo "No objdump found"; exit 5; }
if command -v llvm-nm >/dev/null 2>&1; then
NM="llvm-nm"
elif command -v nm >/dev/null 2>&1; then
NM="nm"
else
NM=""
fi
if command -v c++filt >/dev/null 2>&1; then
CPPFILT="c++filt"
elif command -v llvm-cxxfilt >/dev/null 2>&1; then
CPPFILT="llvm-cxxfilt"
else
CPPFILT=""
fi
# ---- cores ----
if [[ -z "$JOBS" ]]; then
if command -v nproc >/dev/null 2>&1; then JOBS=$(nproc)
elif [[ "$OSTYPE" == "darwin"* ]]; then JOBS=$(sysctl -n hw.ncpu 2>/dev/null || echo 4)
else JOBS=$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)
fi
fi
(( JOBS = JOBS > 1 ? JOBS - 1 : 1 ))
echo ">> Using $JOBS parallel jobs"
TMP=$(mktemp -d)
trap 'rm -rf "$TMP"' EXIT
# ---- extract PCs ----
grep -aoE '0x[0-9a-fA-F]+' "$PCS_IN" | tr 'A-F' 'a-f' | sort | uniq -c >"$TMP/pc_counts.txt" || true
awk '{print $2}' "$TMP/pc_counts.txt" >"$TMP/addrs.txt"
[[ ! -s "$TMP/addrs.txt" ]] && { echo "No addresses found"; exit 5; }
# ---- parallel addr2line (live PC -> function to stderr) ----
CHUNK=400
split -l "$CHUNK" "$TMP/addrs.txt" "$TMP/chunk."
find "$TMP" -name 'chunk.*' -type f -print0 \
| xargs -0 -I{} -n1 -P "$JOBS" bash -c '
set -euo pipefail
ADDR2LINE="$1"; ELF="$2"; CHUNK="$3"; CPP="$4"
OUT="${CHUNK}.sym"
"$ADDR2LINE" -a -f -e "$ELF" $(cat "$CHUNK") \
| tee "$OUT" \
| awk '"'"'NR%3==1{a=$0;next} NR%3==2{f=$0; printf "%s\t%s\n",a,f; next} NR%3==0{next}'"'"' \
| { if [[ -n "$CPP" ]]; then "$CPP"; else cat; fi; } 1>&2
' _ "$ADDR2LINE" "$ELF" {} "$CPPFILT"
# Collate triplets
cat "$TMP"/chunk.*.sym > "$TMP/symbols.raw"
# ---- parse 3-line addr/func/file:line ----
# Normalize leading zeros in addresses so joins match grep-extracted PCs
awk 'NR%3==1{a=$0; sub(/^0x0+/, "0x", a); next} NR%3==2{f=$0; next} NR%3==0{print a "\t" f "\t" $0}' \
"$TMP/symbols.raw" >"$TMP/map.tsv"
# ---- counts: addr -> samplecount ----
awk '{printf "%s\t%s\n",$2,$1}' "$TMP/pc_counts.txt" | sort -k1,1 >"$TMP/counts.tsv"
# ---- choose mapping: default (addr2line; may show inlined names) vs --no-inlines (symbol-table) ----
DEFAULT_ADDR_FUNC="$TMP/addr_func.tsv"
cut -f1,2 "$TMP/map.tsv" | sort -k1,1 >"$DEFAULT_ADDR_FUNC"
if [[ "$NO_INLINES" == "1" ]]; then
if [[ -z "$NM" ]]; then
echo "WARNING: nm/llvm-nm not found; falling back to inline-aware mapping." >&2
ADDR_FUNC_FILE="$DEFAULT_ADDR_FUNC"
else
echo ">> Building symbol table for no-inlines mapping..."
# Create sorted function symbols: hexaddr\tname (demangled if possible afterwards)
# Try llvm-nm format first; fall back to generic nm.
if [[ "$NM" == "llvm-nm" ]]; then
# llvm-nm -n --defined-only emits: ADDRESS TYPE NAME
"$NM" -n --defined-only "$ELF" \
| awk '/ [Tt] /{print $1 "\t" $3}' > "$TMP/syms.raw"
else
# generic nm -n emits: ADDRESS TYPE NAME (varies a bit across platforms)
"$NM" -n --defined-only "$ELF" 2>/dev/null \
| awk '/ [Tt] /{print $1 "\t" $3}' > "$TMP/syms.raw" || true
# macOS nm might output different columns; handle common alt layout:
if [[ ! -s "$TMP/syms.raw" ]]; then
"$NM" -n "$ELF" 2>/dev/null | awk '/ [Tt] /{print $1 "\t" $3}' > "$TMP/syms.raw" || true
fi
fi
if [[ -n "$CPPFILT" && -s "$TMP/syms.raw" ]]; then
"$CPPFILT" < "$TMP/syms.raw" > "$TMP/syms.dem.raw" || cp "$TMP/syms.raw" "$TMP/syms.dem.raw"
else
cp "$TMP/syms.raw" "$TMP/syms.dem.raw"
fi
# Normalize addresses and sort ascending
awk '{addr=$1; sub(/^0x0+/, "0x", addr); print addr "\t" $2}' "$TMP/syms.dem.raw" \
| awk 'NF' \
| sort -k1,1 > "$TMP/syms.tsv"
if [[ ! -s "$TMP/syms.tsv" ]]; then
echo "WARNING: no text symbols found; falling back to inline-aware mapping." >&2
ADDR_FUNC_FILE="$DEFAULT_ADDR_FUNC"
else
# Map each PC to the *containing* function: last symbol with addr <= PC.
# Both syms.tsv and addrs.txt are sorted asc → single pass merge.
awk '
function hex2num(h, x, n,i,c) {
gsub(/^0x/,"",h); n=0
for(i=1;i<=length(h);i++){ c=substr(h,i,1)
x = index("0123456789abcdef", tolower(c)) - 1
if (x<0) x = index("0123456789ABCDEF", c) - 1
n = n*16 + x
}
return n
}
BEGIN {
# preload symbols
while ((getline < ARGV[1]) > 0) {
saddr[NSYM]=$1; sname[NSYM]=$2; NSYM++
}
# load PCs
while ((getline < ARGV[2]) > 0) {
pc[NPC]=$0; NPC++
}
# pointers
si=0
for (i=0; i<NPC; i++) {
p=pc[i]; pn=hex2num(p)
# advance symbol index while next symbol start <= pc
while (si+1<NSYM && hex2num(saddr[si+1]) <= pn) si++
# output mapping: p -> sname[si] (if any)
if (si<NSYM && hex2num(saddr[si]) <= pn)
printf "%s\t%s\n", p, sname[si]
else
printf "%s\t<unknown>\n", p
}
exit 0
}
' "$TMP/syms.tsv" "$TMP/addrs.txt" \
| sort -k1,1 > "$TMP/addr_func.noinline.tsv"
ADDR_FUNC_FILE="$TMP/addr_func.noinline.tsv"
fi
fi
else
ADDR_FUNC_FILE="$DEFAULT_ADDR_FUNC"
fi
# ---- aggregate to hot functions ----
join -t $'\t' -a1 -e "<unknown>" -o 1.2,2.2 "$TMP/counts.tsv" "$ADDR_FUNC_FILE" \
| awk -F'\t' '{s[$2]+=$1} END{for(k in s) printf "%8d %s\n",s[k],k}' \
| sort -nr > "$TMP/hot.txt"
# ---- demangle final hot list (if available) ----
if [[ -n "$CPPFILT" ]]; then
"$CPPFILT" < "$TMP/hot.txt" > hot_functions.txt
else
cp "$TMP/hot.txt" hot_functions.txt
fi
echo "=== Top 50 hot functions ==="
head -50 hot_functions.txt
echo "Full list in: hot_functions.txt"
# ---- annotated source+assembly (optional) ----
if (( ANNOTATE )); then
echo ">> Generating annotated source+assembly..."
awk '{printf "%s %s\n",$2,$1}' "$TMP/pc_counts.txt" >"$TMP/count.map"
if [[ "$OBJDUMP" == "llvm-objdump" ]]; then
# Portable across llvm-objdump versions
"$OBJDUMP" --source -l --demangle -d "$ELF" >"$TMP/disasm.txt"
else
"$OBJDUMP" -S -C -l -d "$ELF" >"$TMP/disasm.txt"
fi
# Overlay hit counts onto the disassembly
awk -v counts="$TMP/count.map" '
BEGIN {
while ((getline < counts) > 0) {
addr=$1; cnt=$2
gsub(/^0x/,"",addr)
map[addr]=cnt
}
}
/^[[:space:]]*[0-9a-f]+:/ {
split($1,a,":"); key=a[1]
if (key in map)
printf("%-12s %6d | %s\n", $1, map[key], substr($0, index($0,$2)))
else
print $0
next
}
{ print }
' "$TMP/disasm.txt" > annotated.S
echo "Annotated source + assembly written to: annotated.S"
echo "Tip: less -R annotated.S"
fi

View File

@@ -1,17 +1,18 @@
// Cardboy firmware entry point: boot platform services and run the modular app system. // Cardboy firmware entry point: boot platform services and run the modular app system.
#include "cardboy/backend/esp_backend.hpp"
#include "cardboy/apps/clock_app.hpp" #include "cardboy/apps/clock_app.hpp"
#include "cardboy/apps/gameboy_app.hpp" #include "cardboy/apps/gameboy_app.hpp"
#include "cardboy/apps/menu_app.hpp" #include "cardboy/apps/menu_app.hpp"
#include "cardboy/apps/tetris_app.hpp" #include "cardboy/apps/tetris_app.hpp"
#include "cardboy/backend/esp_backend.hpp"
#include "cardboy/sdk/app_system.hpp" #include "cardboy/sdk/app_system.hpp"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_err.h" #include "esp_err.h"
#include "esp_pm.h" #include "esp_pm.h"
#include "esp_sleep.h" #include "esp_sleep.h"
#include "esp_system.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "sdkconfig.h" #include "sdkconfig.h"
#include <algorithm> #include <algorithm>
@@ -19,9 +20,9 @@
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <span>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <span>
#include <vector> #include <vector>
namespace { namespace {
@@ -53,11 +54,11 @@ constexpr apps::EmbeddedRomDescriptor kEmbeddedRoms[] = {
#if CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS && CONFIG_FREERTOS_USE_TRACE_FACILITY #if CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS && CONFIG_FREERTOS_USE_TRACE_FACILITY
namespace { namespace {
constexpr TickType_t kStatsTaskDelayTicks = pdMS_TO_TICKS(5000); constexpr TickType_t kStatsTaskDelayTicks = pdMS_TO_TICKS(5000);
constexpr TickType_t kStatsWarmupDelay = pdMS_TO_TICKS(2000); constexpr TickType_t kStatsWarmupDelay = pdMS_TO_TICKS(2000);
constexpr UBaseType_t kStatsTaskPriority = tskIDLE_PRIORITY + 1; constexpr UBaseType_t kStatsTaskPriority = tskIDLE_PRIORITY + 1;
constexpr uint32_t kStatsTaskStack = 4096; constexpr uint32_t kStatsTaskStack = 4096;
constexpr char kStatsTaskName[] = "TaskStats"; constexpr char kStatsTaskName[] = "TaskStats";
struct TaskRuntimeSample { struct TaskRuntimeSample {
TaskHandle_t handle; TaskHandle_t handle;
@@ -65,11 +66,11 @@ struct TaskRuntimeSample {
}; };
struct TaskUsageRow { struct TaskUsageRow {
std::string name; std::string name;
uint64_t delta; uint64_t delta;
UBaseType_t priority; UBaseType_t priority;
uint32_t stackHighWaterBytes; uint32_t stackHighWaterBytes;
bool isIdle; bool isIdle;
}; };
[[nodiscard]] uint64_t deltaWithWrap(uint32_t current, uint32_t previous) { [[nodiscard]] uint64_t deltaWithWrap(uint32_t current, uint32_t previous) {
@@ -79,7 +80,7 @@ struct TaskUsageRow {
} }
void task_usage_monitor(void*) { void task_usage_monitor(void*) {
static constexpr char tag[] = "TaskUsage"; static constexpr char tag[] = "TaskUsage";
std::vector<TaskRuntimeSample> lastSamples; std::vector<TaskRuntimeSample> lastSamples;
uint32_t lastTotal = 0; uint32_t lastTotal = 0;
@@ -94,7 +95,7 @@ void task_usage_monitor(void*) {
std::vector<TaskStatus_t> statusBuffer(taskCount); std::vector<TaskStatus_t> statusBuffer(taskCount);
uint32_t totalRuntime = 0; uint32_t totalRuntime = 0;
const UBaseType_t captured = uxTaskGetSystemState(statusBuffer.data(), statusBuffer.size(), &totalRuntime); const UBaseType_t captured = uxTaskGetSystemState(statusBuffer.data(), statusBuffer.size(), &totalRuntime);
if (captured == 0) if (captured == 0)
continue; continue;
statusBuffer.resize(captured); statusBuffer.resize(captured);
@@ -118,8 +119,8 @@ void task_usage_monitor(void*) {
std::vector<TaskUsageRow> rows; std::vector<TaskUsageRow> rows;
rows.reserve(statusBuffer.size()); rows.reserve(statusBuffer.size());
uint64_t idleDelta = 0; uint64_t idleDelta = 0;
uint64_t activeDelta = 0; uint64_t activeDelta = 0;
uint64_t accountedDelta = 0; uint64_t accountedDelta = 0;
for (const auto& status: statusBuffer) { for (const auto& status: statusBuffer) {
@@ -128,18 +129,18 @@ void task_usage_monitor(void*) {
}); });
const uint32_t previousRuntime = (it != lastSamples.end()) ? it->runtime : status.ulRunTimeCounter; const uint32_t previousRuntime = (it != lastSamples.end()) ? it->runtime : status.ulRunTimeCounter;
const uint64_t taskDelta = (it != lastSamples.end()) ? deltaWithWrap(status.ulRunTimeCounter, previousRuntime) : 0ULL; const uint64_t taskDelta =
(it != lastSamples.end()) ? deltaWithWrap(status.ulRunTimeCounter, previousRuntime) : 0ULL;
currentSamples.push_back({status.xHandle, status.ulRunTimeCounter}); currentSamples.push_back({status.xHandle, status.ulRunTimeCounter});
TaskUsageRow row{ TaskUsageRow row{.name = std::string(status.pcTaskName ? status.pcTaskName : ""),
.name = std::string(status.pcTaskName ? status.pcTaskName : ""), .delta = taskDelta,
.delta = taskDelta, .priority = status.uxCurrentPriority,
.priority = status.uxCurrentPriority, .stackHighWaterBytes =
.stackHighWaterBytes = static_cast<uint32_t>(status.usStackHighWaterMark) * sizeof(StackType_t), static_cast<uint32_t>(status.usStackHighWaterMark) * sizeof(StackType_t),
.isIdle = status.uxCurrentPriority == tskIDLE_PRIORITY || .isIdle = status.uxCurrentPriority == tskIDLE_PRIORITY ||
(status.pcTaskName && std::strncmp(status.pcTaskName, "IDLE", 4) == 0) (status.pcTaskName && std::strncmp(status.pcTaskName, "IDLE", 4) == 0)};
};
rows.push_back(std::move(row)); rows.push_back(std::move(row));
@@ -156,9 +157,8 @@ void task_usage_monitor(void*) {
if (rows.empty()) if (rows.empty())
continue; continue;
std::sort(rows.begin(), rows.end(), [](const TaskUsageRow& a, const TaskUsageRow& b) { std::sort(rows.begin(), rows.end(),
return a.delta > b.delta; [](const TaskUsageRow& a, const TaskUsageRow& b) { return a.delta > b.delta; });
});
const double windowMs = static_cast<double>(totalDelta) / 1000.0; const double windowMs = static_cast<double>(totalDelta) / 1000.0;
@@ -181,14 +181,20 @@ void task_usage_monitor(void*) {
std::printf(" %-16s %6.2f%% (ISRs / scheduler)\n", "<isr>", residualPct); std::printf(" %-16s %6.2f%% (ISRs / scheduler)\n", "<isr>", residualPct);
} }
std::printf("[%s] Active %.2f%% | Idle %.2f%%\n", tag, std::printf("[%s] Active %.2f%% | Idle %.2f%%\n", tag, (activeDelta * 100.0) / static_cast<double>(totalDelta),
(activeDelta * 100.0) / static_cast<double>(totalDelta), idlePct); idlePct);
const uint32_t heapFree = esp_get_free_heap_size();
const uint32_t heapMinimum = esp_get_minimum_free_heap_size();
std::printf("[%s] Heap free %lu B | Min free %lu B\n", tag, static_cast<unsigned long>(heapFree),
static_cast<unsigned long>(heapMinimum));
std::fflush(stdout); std::fflush(stdout);
} }
} }
void start_task_usage_monitor() { void start_task_usage_monitor() {
xTaskCreatePinnedToCore(task_usage_monitor, kStatsTaskName, kStatsTaskStack, nullptr, kStatsTaskPriority, nullptr, 0); xTaskCreatePinnedToCore(task_usage_monitor, kStatsTaskName, kStatsTaskStack, nullptr, kStatsTaskPriority, nullptr,
0);
} }
} // namespace } // namespace