ix-tools/tools/forge.sh
SALKA 5b3d4e0e1d feat: Complete toolchain - organ, forge, store, compilation, installer, site source, architecture docs
- tools/organ.py: Pack/install/publish AI persona organs
- tools/forge.sh: Convert HuggingFace models to GGUF, quantize
- tools/store.sh: Browse/install/publish community models
- tools/compilation/: Cross-platform build scripts (Linux/macOS)
- scripts/install.sh: Universal installer (auto-detect OS/arch/GPU)
- site/saas/: SaaS frontend + backend source (v3 unified design)
- site/vitrine/: Main site source (inference-x.com)
- docs/ARCHITECTURE.md: Full system architecture

All plans now in test mode - Studio & Enterprise free to test.
Branch: master | Maintainer: Anti-Atlas craton (elmadani)
2026-02-24 20:39:52 +00:00

161 lines
4.9 KiB
Bash

#!/bin/bash
# IX Forge — Model conversion and quantization pipeline
# Usage: ./forge.sh <command> [options]
# Commands: convert, quantize, package, benchmark
set -e
IX_FORGE_VER="1.0.0"
LLAMA_CPP_DIR="${IX_LLAMA_CPP:-$HOME/.inference-x/llama.cpp}"
OUTPUT_DIR="${IX_OUTPUT:-./forge-output}"
log() { echo -e "\033[0;36m[IX-FORGE]\033[0m $1"; }
ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
err() { echo -e "\033[0;31m[✗]\033[0m $1"; exit 1; }
usage() {
cat << 'USAGE'
IX Forge v1.0 — Model conversion and quantization
USAGE:
./forge.sh convert --source <hf_model_dir> --output <name.gguf>
./forge.sh quantize --input <model.gguf> --quant Q4_K_M --output <name_q4.gguf>
./forge.sh package --model <model.gguf> --name "ModelName" --version 1.0
./forge.sh benchmark --model <model.gguf> --prompt "Hello" --runs 10
QUANTIZATION LEVELS:
Q2_K — Smallest (50% quality loss, ~1.5bit)
Q4_0 — Small (faster, less accurate)
Q4_K_M — RECOMMENDED (best size/quality balance)
Q5_K_M — High quality
Q6_K — Near-lossless
Q8_0 — Near-perfect
F16 — Full precision (2x model size)
EXAMPLES:
# Convert Mistral 7B from HuggingFace
./forge.sh convert --source ./mistral-7b-v0.1 --output mistral-7b.gguf
# Quantize to Q4_K_M
./forge.sh quantize --input mistral-7b.gguf --quant Q4_K_M --output mistral-7b-q4.gguf
# Full pipeline
./forge.sh convert --source ./mymodel && ./forge.sh quantize --input mymodel.gguf --quant Q4_K_M
USAGE
}
check_llama_cpp() {
if [ ! -f "$LLAMA_CPP_DIR/convert_hf_to_gguf.py" ]; then
log "llama.cpp not found at $LLAMA_CPP_DIR"
log "Installing..."
mkdir -p "$LLAMA_CPP_DIR"
git clone --depth=1 https://github.com/ggerganov/llama.cpp.git "$LLAMA_CPP_DIR" 2>&1 | tail -3
cd "$LLAMA_CPP_DIR" && cmake -B build -DLLAMA_BUILD_SERVER=OFF && cmake --build build -j4 2>&1 | tail -5
cd -
ok "llama.cpp installed"
fi
}
cmd_convert() {
local source="" output=""
while [[ $# -gt 0 ]]; do
case $1 in
--source) source="$2"; shift ;;
--output) output="$2"; shift ;;
esac; shift
done
[ -z "$source" ] && err "Missing --source"
[ -z "$output" ] && output="$(basename $source).gguf"
check_llama_cpp
mkdir -p "$OUTPUT_DIR"
log "Converting $source$OUTPUT_DIR/$output"
python3 "$LLAMA_CPP_DIR/convert_hf_to_gguf.py" "$source" --outtype f16 --outfile "$OUTPUT_DIR/$output"
ok "Converted: $OUTPUT_DIR/$output ($(du -sh $OUTPUT_DIR/$output | cut -f1))"
}
cmd_quantize() {
local input="" quant="Q4_K_M" output=""
while [[ $# -gt 0 ]]; do
case $1 in
--input) input="$2"; shift ;;
--quant) quant="$2"; shift ;;
--output) output="$2"; shift ;;
esac; shift
done
[ -z "$input" ] && err "Missing --input"
[ -z "$output" ] && output="${input%.gguf}_${quant}.gguf"
check_llama_cpp
log "Quantizing $input$output (${quant})"
"$LLAMA_CPP_DIR/build/bin/llama-quantize" "$input" "$output" "$quant"
ok "Quantized: $output ($(du -sh $output | cut -f1))"
}
cmd_package() {
local model="" name="" version="1.0"
while [[ $# -gt 0 ]]; do
case $1 in
--model) model="$2"; shift ;;
--name) name="$2"; shift ;;
--version) version="$2"; shift ;;
esac; shift
done
[ -z "$model" ] && err "Missing --model"
[ -z "$name" ] && name="$(basename $model .gguf)"
local pkg_dir="$OUTPUT_DIR/pkg-$name-$version"
mkdir -p "$pkg_dir"
cp "$model" "$pkg_dir/"
local size=$(wc -c < "$model")
local sha=$(sha256sum "$model" | cut -c1-32)
cat > "$pkg_dir/manifest.json" << MANIFEST
{
"name": "$name",
"version": "$version",
"model_file": "$(basename $model)",
"size_bytes": $size,
"sha256": "$sha",
"format": "gguf",
"ix_compatible": true,
"created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
MANIFEST
tar -czf "$OUTPUT_DIR/$name-$version.ix-package" -C "$OUTPUT_DIR" "pkg-$name-$version"
rm -rf "$pkg_dir"
ok "Packaged: $OUTPUT_DIR/$name-$version.ix-package"
}
cmd_benchmark() {
local model="" prompt="Hello, how are you?" runs=5
while [[ $# -gt 0 ]]; do
case $1 in
--model) model="$2"; shift ;;
--prompt) prompt="$2"; shift ;;
--runs) runs=$2; shift ;;
esac; shift
done
[ -z "$model" ] && err "Missing --model"
log "Benchmarking $model ($runs runs)"
log "Prompt: $prompt"
local total=0
for i in $(seq 1 $runs); do
local start=$(date +%s%N)
curl -s -X POST http://localhost:8080/v1/completions \
-H "Content-Type: application/json" \
-d "{\"prompt\":\"$prompt\",\"max_tokens\":50}" > /dev/null
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
log "Run $i: ${ms}ms"
total=$((total + ms))
done
local avg=$((total / runs))
ok "Average latency: ${avg}ms over $runs runs"
}
case "${1:-help}" in
convert) shift; cmd_convert "$@" ;;
quantize) shift; cmd_quantize "$@" ;;
package) shift; cmd_package "$@" ;;
benchmark) shift; cmd_benchmark "$@" ;;
*) usage ;;
esac