Better output from the same model. Fused computation, adaptive precision, surgical expert loading. 305 KB, 19 backends, zero dependencies. https://inference-x.com
19 lines
537 B
Bash
Executable File
19 lines
537 B
Bash
Executable File
#!/bin/bash
|
|
# InferenceX — Expert Profiling
|
|
# Tracks which of 384 experts activate per layer per token.
|
|
# Output: CSV with columns [token, layer, expert_id, weight]
|
|
# Use this to identify essential experts for model pruning.
|
|
|
|
MODEL=${1:-"./model.gguf"}
|
|
OUTPUT=${2:-"expert_profile.csv"}
|
|
|
|
./infer_unified "$MODEL" \
|
|
-p "Think step by step about how to build a sustainable desert settlement." \
|
|
-n 20 \
|
|
-t 0.6 \
|
|
--profile "$OUTPUT"
|
|
|
|
echo ""
|
|
echo "Profile saved to: $OUTPUT"
|
|
echo "Analyze with: python3 analyze_router.py $OUTPUT"
|