#!/usr/bin/env bash # parallel-pc-profile.sh — parallel symbol resolver + optional annotated disassembly # Supports C++ demangling, LLVM disassembler, and optional no-inlines aggregation (symbol-table based). # # Usage: # ./parallel-pc-profile.sh [-j jobs] [--annotate] [--no-inlines] firmware.elf pcs.txt set -euo pipefail usage() { echo "Usage: $0 [-j jobs] [--annotate] [--no-inlines] firmware.elf pcs.txt" exit 1 } ANNOTATE=0 JOBS="" NO_INLINES=0 # ---- args ---- while [[ $# -gt 0 ]]; do case "$1" in -j) JOBS="$2"; shift 2 ;; --annotate) ANNOTATE=1; shift ;; --no-inlines) NO_INLINES=1; shift ;; -h|--help) usage ;; *) break ;; esac done [[ $# -lt 2 ]] && usage ELF="$1" PCS_IN="$2" [[ ! -f "$ELF" ]] && { echo "ELF not found: $ELF" >&2; exit 2; } [[ ! -f "$PCS_IN" ]] && { echo "PC log not found: $PCS_IN" >&2; exit 3; } # ---- tools ---- ADDR2LINE="" for t in llvm-addr2line eu-addr2line riscv32-esp-elf-addr2line xtensa-esp32-elf-addr2line addr2line; do if command -v "$t" >/dev/null 2>&1; then ADDR2LINE="$t"; break; fi done [[ -z "$ADDR2LINE" ]] && { echo "No addr2line found"; exit 4; } if command -v llvm-objdump >/dev/null 2>&1; then OBJDUMP="llvm-objdump" else for t in riscv32-esp-elf-objdump xtensa-esp32-elf-objdump objdump; do if command -v "$t" >/dev/null 2>&1; then OBJDUMP="$t"; break; fi done fi [[ -z "${OBJDUMP:-}" ]] && { echo "No objdump found"; exit 5; } if command -v llvm-nm >/dev/null 2>&1; then NM="llvm-nm" elif command -v nm >/dev/null 2>&1; then NM="nm" else NM="" fi if command -v c++filt >/dev/null 2>&1; then CPPFILT="c++filt" elif command -v llvm-cxxfilt >/dev/null 2>&1; then CPPFILT="llvm-cxxfilt" else CPPFILT="" fi # ---- cores ---- if [[ -z "$JOBS" ]]; then if command -v nproc >/dev/null 2>&1; then JOBS=$(nproc) elif [[ "$OSTYPE" == "darwin"* ]]; then JOBS=$(sysctl -n hw.ncpu 2>/dev/null || echo 4) else JOBS=$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4) fi fi (( JOBS = JOBS > 1 ? JOBS - 1 : 1 )) echo ">> Using $JOBS parallel jobs" TMP=$(mktemp -d) trap 'rm -rf "$TMP"' EXIT # ---- extract PCs ---- grep -aoE '0x[0-9a-fA-F]+' "$PCS_IN" | tr 'A-F' 'a-f' | sort | uniq -c >"$TMP/pc_counts.txt" || true awk '{print $2}' "$TMP/pc_counts.txt" >"$TMP/addrs.txt" [[ ! -s "$TMP/addrs.txt" ]] && { echo "No addresses found"; exit 5; } # ---- parallel addr2line (live PC -> function to stderr) ---- CHUNK=400 split -l "$CHUNK" "$TMP/addrs.txt" "$TMP/chunk." find "$TMP" -name 'chunk.*' -type f -print0 \ | xargs -0 -I{} -n1 -P "$JOBS" bash -c ' set -euo pipefail ADDR2LINE="$1"; ELF="$2"; CHUNK="$3"; CPP="$4" OUT="${CHUNK}.sym" "$ADDR2LINE" -a -f -e "$ELF" $(cat "$CHUNK") \ | tee "$OUT" \ | awk '"'"'NR%3==1{a=$0;next} NR%3==2{f=$0; printf "%s\t%s\n",a,f; next} NR%3==0{next}'"'"' \ | { if [[ -n "$CPP" ]]; then "$CPP"; else cat; fi; } 1>&2 ' _ "$ADDR2LINE" "$ELF" {} "$CPPFILT" # Collate triplets cat "$TMP"/chunk.*.sym > "$TMP/symbols.raw" # ---- parse 3-line addr/func/file:line ---- # Normalize leading zeros in addresses so joins match grep-extracted PCs awk 'NR%3==1{a=$0; sub(/^0x0+/, "0x", a); next} NR%3==2{f=$0; next} NR%3==0{print a "\t" f "\t" $0}' \ "$TMP/symbols.raw" >"$TMP/map.tsv" # ---- counts: addr -> samplecount ---- awk '{printf "%s\t%s\n",$2,$1}' "$TMP/pc_counts.txt" | sort -k1,1 >"$TMP/counts.tsv" # ---- choose mapping: default (addr2line; may show inlined names) vs --no-inlines (symbol-table) ---- DEFAULT_ADDR_FUNC="$TMP/addr_func.tsv" cut -f1,2 "$TMP/map.tsv" | sort -k1,1 >"$DEFAULT_ADDR_FUNC" if [[ "$NO_INLINES" == "1" ]]; then if [[ -z "$NM" ]]; then echo "WARNING: nm/llvm-nm not found; falling back to inline-aware mapping." >&2 ADDR_FUNC_FILE="$DEFAULT_ADDR_FUNC" else echo ">> Building symbol table for no-inlines mapping..." # Create sorted function symbols: hexaddr\tname (demangled if possible afterwards) # Try llvm-nm format first; fall back to generic nm. if [[ "$NM" == "llvm-nm" ]]; then # llvm-nm -n --defined-only emits: ADDRESS TYPE NAME "$NM" -n --defined-only "$ELF" \ | awk '/ [Tt] /{print $1 "\t" $3}' > "$TMP/syms.raw" else # generic nm -n emits: ADDRESS TYPE NAME (varies a bit across platforms) "$NM" -n --defined-only "$ELF" 2>/dev/null \ | awk '/ [Tt] /{print $1 "\t" $3}' > "$TMP/syms.raw" || true # macOS nm might output different columns; handle common alt layout: if [[ ! -s "$TMP/syms.raw" ]]; then "$NM" -n "$ELF" 2>/dev/null | awk '/ [Tt] /{print $1 "\t" $3}' > "$TMP/syms.raw" || true fi fi if [[ -n "$CPPFILT" && -s "$TMP/syms.raw" ]]; then "$CPPFILT" < "$TMP/syms.raw" > "$TMP/syms.dem.raw" || cp "$TMP/syms.raw" "$TMP/syms.dem.raw" else cp "$TMP/syms.raw" "$TMP/syms.dem.raw" fi # Normalize addresses and sort ascending awk '{addr=$1; sub(/^0x0+/, "0x", addr); print addr "\t" $2}' "$TMP/syms.dem.raw" \ | awk 'NF' \ | sort -k1,1 > "$TMP/syms.tsv" if [[ ! -s "$TMP/syms.tsv" ]]; then echo "WARNING: no text symbols found; falling back to inline-aware mapping." >&2 ADDR_FUNC_FILE="$DEFAULT_ADDR_FUNC" else # Map each PC to the *containing* function: last symbol with addr <= PC. # Both syms.tsv and addrs.txt are sorted asc → single pass merge. awk ' function hex2num(h, x, n,i,c) { gsub(/^0x/,"",h); n=0 for(i=1;i<=length(h);i++){ c=substr(h,i,1) x = index("0123456789abcdef", tolower(c)) - 1 if (x<0) x = index("0123456789ABCDEF", c) - 1 n = n*16 + x } return n } BEGIN { # preload symbols while ((getline < ARGV[1]) > 0) { saddr[NSYM]=$1; sname[NSYM]=$2; NSYM++ } # load PCs while ((getline < ARGV[2]) > 0) { pc[NPC]=$0; NPC++ } # pointers si=0 for (i=0; i sname[si] (if any) if (si\n", p } exit 0 } ' "$TMP/syms.tsv" "$TMP/addrs.txt" \ | sort -k1,1 > "$TMP/addr_func.noinline.tsv" ADDR_FUNC_FILE="$TMP/addr_func.noinline.tsv" fi fi else ADDR_FUNC_FILE="$DEFAULT_ADDR_FUNC" fi # ---- aggregate to hot functions ---- join -t $'\t' -a1 -e "" -o 1.2,2.2 "$TMP/counts.tsv" "$ADDR_FUNC_FILE" \ | awk -F'\t' '{s[$2]+=$1} END{for(k in s) printf "%8d %s\n",s[k],k}' \ | sort -nr > "$TMP/hot.txt" # ---- demangle final hot list (if available) ---- if [[ -n "$CPPFILT" ]]; then "$CPPFILT" < "$TMP/hot.txt" > hot_functions.txt else cp "$TMP/hot.txt" hot_functions.txt fi echo "=== Top 50 hot functions ===" head -50 hot_functions.txt echo "Full list in: hot_functions.txt" # ---- annotated source+assembly (optional) ---- if (( ANNOTATE )); then echo ">> Generating annotated source+assembly..." awk '{printf "%s %s\n",$2,$1}' "$TMP/pc_counts.txt" >"$TMP/count.map" if [[ "$OBJDUMP" == "llvm-objdump" ]]; then # Portable across llvm-objdump versions "$OBJDUMP" --source -l --demangle -d "$ELF" >"$TMP/disasm.txt" else "$OBJDUMP" -S -C -l -d "$ELF" >"$TMP/disasm.txt" fi # Overlay hit counts onto the disassembly awk -v counts="$TMP/count.map" ' BEGIN { while ((getline < counts) > 0) { addr=$1; cnt=$2 gsub(/^0x/,"",addr) map[addr]=cnt } } /^[[:space:]]*[0-9a-f]+:/ { split($1,a,":"); key=a[1] if (key in map) printf("%-12s %6d | %s\n", $1, map[key], substr($0, index($0,$2))) else print $0 next } { print } ' "$TMP/disasm.txt" > annotated.S echo "Annotated source + assembly written to: annotated.S" echo "Tip: less -R annotated.S" fi