inference-x/examples/ix.sh
Salka Elmadani ec36668cf5 Inference-X v1.0 — Universal AI Inference Engine
Better output from the same model. Fused computation, adaptive precision,
surgical expert loading. 305 KB, 19 backends, zero dependencies.

https://inference-x.com
2026-02-23 07:10:47 +00:00

165 lines
9.4 KiB
Bash
Executable File

#!/usr/bin/env bash
# ix — Inference-X Model Hub & Benchmark
# Salka Elmadani | Morocco
set -uo pipefail
IX="${IX:-./inference-x}"
HUB="${HUB:-./models}"
RES="${RES:-./benchmarks}"
mkdir -p "$HUB" "$RES"
CPU=$(grep -m1 "model name" /proc/cpuinfo | sed "s/.*: *//" | sed 's/\s\+/ /g')
RAM_GB=$(awk '/MemTotal/ {printf "%.0f", $2/1024/1024}' /proc/meminfo)
CORES=$(nproc)
find_model() {
local fn="$1"
for d in "$HUB" $HOME/models $HOME/models; do
[[ -f "$d/$fn" ]] && echo "$d/$fn" && return 0
done
return 1
}
bench_one() {
local name="$1" fn="$2" size="$3" params="$4" quant="$5" ntok="${6:-4}"
local path=$(find_model "$fn")
[[ -z "$path" ]] && printf " %-20s NOT FOUND\n" "$name" && return 1
sync; echo 3 > /proc/sys/vm/drop_caches 2>/dev/null || true
local log="$RES/${name}.log"
local t0=$(date +%s%N)
timeout 600 "$IX" "$path" --raw -p "The capital of France is" -n "$ntok" -t 0.1 > "$log" 2>&1
local rc=$? t1=$(date +%s%N)
local ms=$(( (t1 - t0) / 1000000 ))
local secs=$(echo "scale=1; $ms / 1000" | bc 2>/dev/null || echo "?")
local gen=$(grep -oP '\[GEN\] \K\d+' "$log" 2>/dev/null || echo "0")
local output=$(awk '/OUTPUT/{f=1;next} /────/{if(f)exit} f' "$log" | tr '\n' ' ' | sed 's/^[[:space:]]*//' | head -c 60)
local tps="0"
[[ "$gen" -gt 0 && "$ms" -gt 0 ]] && tps=$(echo "scale=2; $gen * 1000 / $ms" | bc 2>/dev/null || echo "0")
local q="FAIL"
[[ $rc -eq 124 ]] && q="TIMEOUT"
[[ $rc -ne 0 && $rc -ne 124 ]] && q="CRASH"
[[ "$gen" -gt 0 ]] && q="OK"
echo "$output" | grep -qiP '[a-z]{2,}' || q="GARB"
printf " %-20s %5s %7s %5sGB %7ss %6s/s %-7s %.50s\n" "$name" "$params" "$quant" "$size" "$secs" "$tps" "$q" "$output"
echo "$name,$params,$quant,$size,$secs,$tps,$q" >> "$RES/results.csv"
}
case "${1:-help}" in
list)
echo ""
echo " INFERENCE-X MODEL HUB | $CPU | ${RAM_GB}GB | $CORES cores"
echo ""
printf " %-20s %5s %7s %6s %s\n" "MODEL" "PARAM" "QUANT" "SIZE" "STATUS"
echo " ════════════════════════════════════════════════════════════"
while IFS='|' read -r name repo fn size params quant; do
path=$(find_model "$fn" 2>/dev/null)
st="REMOTE"; [[ -n "$path" ]] && st="LOCAL"
sz=${size%.*}; [[ $sz -gt $RAM_GB ]] && [[ "$st" == "REMOTE" ]] && st="TOO BIG"
printf " %-20s %5s %7s %5sGB %s\n" "$name" "$params" "$quant" "$size" "$st"
done << 'REGISTRY'
smollm2-135m|HuggingFaceTB/SmolLM2-135M-Instruct-GGUF|smollm2-135m-instruct-q8_0.gguf|0.1|135M|Q8_0
llama-3.2-1b|bartowski/Llama-3.2-1B-Instruct-GGUF|Llama-3.2-1B-Instruct-Q4_K_M.gguf|0.8|1B|Q4_K_M
llama-3.2-3b|bartowski/Llama-3.2-3B-Instruct-GGUF|Llama-3.2-3B-Instruct-Q4_K_M.gguf|2.0|3B|Q4_K_M
qwen2.5-3b|Qwen/Qwen2.5-3B-Instruct-GGUF|qwen2.5-3b-instruct-q4_k_m.gguf|2.0|3B|Q4_K_M
phi-3.5-mini|bartowski/Phi-3.5-mini-instruct-GGUF|Phi-3.5-mini-instruct-Q4_K_M.gguf|2.3|3.8B|Q4_K_M
deepseek-r1-7b|bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF|DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf|4.7|7B|Q4_K_M
qwen2.5-7b|Qwen/Qwen2.5-7B-Instruct-GGUF|qwen2.5-7b-instruct-q4_k_m.gguf|4.7|7B|Q4_K_M
mistral-7b|bartowski/Mistral-7B-Instruct-v0.3-GGUF|Mistral-7B-Instruct-v0.3-Q4_K_M.gguf|4.4|7B|Q4_K_M
llama-3.1-8b|bartowski/Meta-Llama-3.1-8B-Instruct-GGUF|Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf|4.9|8B|Q4_K_M
gemma-2-9b|bartowski/gemma-2-9b-it-GGUF|gemma-2-9b-it-Q4_K_M.gguf|5.8|9B|Q4_K_M
deepseek-r1-14b|bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF|DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf|8.7|14B|Q4_K_M
qwen2.5-14b|Qwen/Qwen2.5-14B-Instruct-GGUF|qwen2.5-14b-instruct-q4_k_m.gguf|9.0|14B|Q4_K_M
qwen2.5-32b|Qwen/Qwen2.5-32B-Instruct-GGUF|qwen2.5-32b-instruct-q4_k_m.gguf|19.8|32B|Q4_K_M
deepseek-r1-32b|bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF|DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf|19.8|32B|Q4_K_M
llama-3.1-70b|bartowski/Meta-Llama-3.1-70B-Instruct-GGUF|Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf|42.5|70B|Q4_K_M
qwen2.5-72b|Qwen/Qwen2.5-72B-Instruct-GGUF|qwen2.5-72b-instruct-q4_k_m.gguf|44.0|72B|Q4_K_M
REGISTRY
echo ""
;;
pull)
name="${2:-}"
[[ -z "$name" ]] && echo "Usage: ix pull <model>" && exit 1
while IFS='|' read -r n repo fn size params quant; do
[[ "$n" != "$name" && "$name" != "all" ]] && continue
sz=${size%.*}
[[ "$name" == "all" && $sz -gt $RAM_GB ]] && echo "SKIP $n (${size}GB > ${RAM_GB}GB)" && continue
path=$(find_model "$fn" 2>/dev/null)
[[ -n "$path" ]] && echo "$n: $path" && continue
echo "$n (${size}GB)..."
wget -q --show-progress -c -O "$HUB/$fn" "https://huggingface.co/$repo/resolve/main/$fn"
[[ $? -eq 0 ]] && echo "$n" || echo "$n FAILED"
done << 'REGISTRY'
smollm2-135m|HuggingFaceTB/SmolLM2-135M-Instruct-GGUF|smollm2-135m-instruct-q8_0.gguf|0.1|135M|Q8_0
llama-3.2-1b|bartowski/Llama-3.2-1B-Instruct-GGUF|Llama-3.2-1B-Instruct-Q4_K_M.gguf|0.8|1B|Q4_K_M
llama-3.2-3b|bartowski/Llama-3.2-3B-Instruct-GGUF|Llama-3.2-3B-Instruct-Q4_K_M.gguf|2.0|3B|Q4_K_M
qwen2.5-3b|Qwen/Qwen2.5-3B-Instruct-GGUF|qwen2.5-3b-instruct-q4_k_m.gguf|2.0|3B|Q4_K_M
phi-3.5-mini|bartowski/Phi-3.5-mini-instruct-GGUF|Phi-3.5-mini-instruct-Q4_K_M.gguf|2.3|3.8B|Q4_K_M
deepseek-r1-7b|bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF|DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf|4.7|7B|Q4_K_M
qwen2.5-7b|Qwen/Qwen2.5-7B-Instruct-GGUF|qwen2.5-7b-instruct-q4_k_m.gguf|4.7|7B|Q4_K_M
mistral-7b|bartowski/Mistral-7B-Instruct-v0.3-GGUF|Mistral-7B-Instruct-v0.3-Q4_K_M.gguf|4.4|7B|Q4_K_M
llama-3.1-8b|bartowski/Meta-Llama-3.1-8B-Instruct-GGUF|Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf|4.9|8B|Q4_K_M
gemma-2-9b|bartowski/gemma-2-9b-it-GGUF|gemma-2-9b-it-Q4_K_M.gguf|5.8|9B|Q4_K_M
deepseek-r1-14b|bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF|DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf|8.7|14B|Q4_K_M
qwen2.5-14b|Qwen/Qwen2.5-14B-Instruct-GGUF|qwen2.5-14b-instruct-q4_k_m.gguf|9.0|14B|Q4_K_M
qwen2.5-32b|Qwen/Qwen2.5-32B-Instruct-GGUF|qwen2.5-32b-instruct-q4_k_m.gguf|19.8|32B|Q4_K_M
deepseek-r1-32b|bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF|DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf|19.8|32B|Q4_K_M
llama-3.1-70b|bartowski/Meta-Llama-3.1-70B-Instruct-GGUF|Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf|42.5|70B|Q4_K_M
qwen2.5-72b|Qwen/Qwen2.5-72B-Instruct-GGUF|qwen2.5-72b-instruct-q4_k_m.gguf|44.0|72B|Q4_K_M
REGISTRY
;;
bench)
target="${2:-all}"
ntok="${3:-4}"
echo ""
echo "═══════════════════════════════════════════════════════════════"
echo " INFERENCE-X VALIDATION | $CPU | ${RAM_GB}GB | $CORES cores"
echo " $(date -u +%Y-%m-%dT%H:%M:%SZ) | $ntok tokens/model"
echo "═══════════════════════════════════════════════════════════════"
echo ""
printf " %-20s %5s %7s %6s %7s %8s %-7s %s\n" "MODEL" "PARAM" "QUANT" "SIZE" "TIME" "SPEED" "QUAL" "OUTPUT"
echo " ════════════════════════════════════════════════════════════════════════════════════"
echo "model,params,quant,size_gb,time_s,tok_s,quality" > "$RES/results.csv"
while IFS='|' read -r name repo fn size params quant; do
[[ "$target" != "all" && "$target" != "$name" ]] && continue
bench_one "$name" "$fn" "$size" "$params" "$quant" "$ntok"
done << 'REGISTRY'
smollm2-135m|HuggingFaceTB/SmolLM2-135M-Instruct-GGUF|smollm2-135m-instruct-q8_0.gguf|0.1|135M|Q8_0
llama-3.2-3b|bartowski/Llama-3.2-3B-Instruct-GGUF|Llama-3.2-3B-Instruct-Q4_K_M.gguf|2.0|3B|Q4_K_M
phi-3.5-mini|bartowski/Phi-3.5-mini-instruct-GGUF|Phi-3.5-mini-instruct-Q4_K_M.gguf|2.3|3.8B|Q4_K_M
deepseek-r1-7b|bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF|DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf|4.7|7B|Q4_K_M
deepseek-r1-14b|bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF|DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf|8.7|14B|Q4_K_M
REGISTRY
echo ""
echo " Results: $RES/results.csv"
echo "═══════════════════════════════════════════════════════════════"
;;
serve)
port="${2:-8080}"
model="${3:-}"
if [[ -z "$model" ]]; then
# Auto-select best model that fits in RAM
best=""
while IFS='|' read -r n repo fn size params quant; do
sz=${size%.*}
[[ $sz -gt $RAM_GB ]] && continue
path=$(find_model "$fn" 2>/dev/null)
[[ -n "$path" ]] && best="$path"
done << 'REGISTRY'
smollm2-135m|HuggingFaceTB/SmolLM2-135M-Instruct-GGUF|smollm2-135m-instruct-q8_0.gguf|0.1|135M|Q8_0
llama-3.2-3b|bartowski/Llama-3.2-3B-Instruct-GGUF|Llama-3.2-3B-Instruct-Q4_K_M.gguf|2.0|3B|Q4_K_M
deepseek-r1-7b|bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF|DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf|4.7|7B|Q4_K_M
REGISTRY
[[ -z "$best" ]] && echo "No model found. Run: ix pull <model>" && exit 1
model="$best"
fi
echo "Starting IX server on port $port with $model"
"$IX" "$model" --serve "$port"
;;
*)
echo " ix list | pull <model|all> | bench [model|all] [ntok]"
;;
esac