forked from elmadani/ix-tools
- tools/organ.py: Pack/install/publish AI persona organs - tools/forge.sh: Convert HuggingFace models to GGUF, quantize - tools/store.sh: Browse/install/publish community models - tools/compilation/: Cross-platform build scripts (Linux/macOS) - scripts/install.sh: Universal installer (auto-detect OS/arch/GPU) - site/saas/: SaaS frontend + backend source (v3 unified design) - site/vitrine/: Main site source (inference-x.com) - docs/ARCHITECTURE.md: Full system architecture All plans now in test mode - Studio & Enterprise free to test. Branch: master | Maintainer: Anti-Atlas craton (elmadani)
161 lines
4.9 KiB
Bash
161 lines
4.9 KiB
Bash
#!/bin/bash
|
|
# IX Forge — Model conversion and quantization pipeline
|
|
# Usage: ./forge.sh <command> [options]
|
|
# Commands: convert, quantize, package, benchmark
|
|
|
|
set -e
|
|
|
|
IX_FORGE_VER="1.0.0"
|
|
LLAMA_CPP_DIR="${IX_LLAMA_CPP:-$HOME/.inference-x/llama.cpp}"
|
|
OUTPUT_DIR="${IX_OUTPUT:-./forge-output}"
|
|
|
|
log() { echo -e "\033[0;36m[IX-FORGE]\033[0m $1"; }
|
|
ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
|
|
err() { echo -e "\033[0;31m[✗]\033[0m $1"; exit 1; }
|
|
|
|
usage() {
|
|
cat << 'USAGE'
|
|
IX Forge v1.0 — Model conversion and quantization
|
|
|
|
USAGE:
|
|
./forge.sh convert --source <hf_model_dir> --output <name.gguf>
|
|
./forge.sh quantize --input <model.gguf> --quant Q4_K_M --output <name_q4.gguf>
|
|
./forge.sh package --model <model.gguf> --name "ModelName" --version 1.0
|
|
./forge.sh benchmark --model <model.gguf> --prompt "Hello" --runs 10
|
|
|
|
QUANTIZATION LEVELS:
|
|
Q2_K — Smallest (50% quality loss, ~1.5bit)
|
|
Q4_0 — Small (faster, less accurate)
|
|
Q4_K_M — RECOMMENDED (best size/quality balance)
|
|
Q5_K_M — High quality
|
|
Q6_K — Near-lossless
|
|
Q8_0 — Near-perfect
|
|
F16 — Full precision (2x model size)
|
|
|
|
EXAMPLES:
|
|
# Convert Mistral 7B from HuggingFace
|
|
./forge.sh convert --source ./mistral-7b-v0.1 --output mistral-7b.gguf
|
|
|
|
# Quantize to Q4_K_M
|
|
./forge.sh quantize --input mistral-7b.gguf --quant Q4_K_M --output mistral-7b-q4.gguf
|
|
|
|
# Full pipeline
|
|
./forge.sh convert --source ./mymodel && ./forge.sh quantize --input mymodel.gguf --quant Q4_K_M
|
|
|
|
USAGE
|
|
}
|
|
|
|
check_llama_cpp() {
|
|
if [ ! -f "$LLAMA_CPP_DIR/convert_hf_to_gguf.py" ]; then
|
|
log "llama.cpp not found at $LLAMA_CPP_DIR"
|
|
log "Installing..."
|
|
mkdir -p "$LLAMA_CPP_DIR"
|
|
git clone --depth=1 https://github.com/ggerganov/llama.cpp.git "$LLAMA_CPP_DIR" 2>&1 | tail -3
|
|
cd "$LLAMA_CPP_DIR" && cmake -B build -DLLAMA_BUILD_SERVER=OFF && cmake --build build -j4 2>&1 | tail -5
|
|
cd -
|
|
ok "llama.cpp installed"
|
|
fi
|
|
}
|
|
|
|
cmd_convert() {
|
|
local source="" output=""
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--source) source="$2"; shift ;;
|
|
--output) output="$2"; shift ;;
|
|
esac; shift
|
|
done
|
|
[ -z "$source" ] && err "Missing --source"
|
|
[ -z "$output" ] && output="$(basename $source).gguf"
|
|
check_llama_cpp
|
|
mkdir -p "$OUTPUT_DIR"
|
|
log "Converting $source → $OUTPUT_DIR/$output"
|
|
python3 "$LLAMA_CPP_DIR/convert_hf_to_gguf.py" "$source" --outtype f16 --outfile "$OUTPUT_DIR/$output"
|
|
ok "Converted: $OUTPUT_DIR/$output ($(du -sh $OUTPUT_DIR/$output | cut -f1))"
|
|
}
|
|
|
|
cmd_quantize() {
|
|
local input="" quant="Q4_K_M" output=""
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--input) input="$2"; shift ;;
|
|
--quant) quant="$2"; shift ;;
|
|
--output) output="$2"; shift ;;
|
|
esac; shift
|
|
done
|
|
[ -z "$input" ] && err "Missing --input"
|
|
[ -z "$output" ] && output="${input%.gguf}_${quant}.gguf"
|
|
check_llama_cpp
|
|
log "Quantizing $input → $output (${quant})"
|
|
"$LLAMA_CPP_DIR/build/bin/llama-quantize" "$input" "$output" "$quant"
|
|
ok "Quantized: $output ($(du -sh $output | cut -f1))"
|
|
}
|
|
|
|
cmd_package() {
|
|
local model="" name="" version="1.0"
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--model) model="$2"; shift ;;
|
|
--name) name="$2"; shift ;;
|
|
--version) version="$2"; shift ;;
|
|
esac; shift
|
|
done
|
|
[ -z "$model" ] && err "Missing --model"
|
|
[ -z "$name" ] && name="$(basename $model .gguf)"
|
|
local pkg_dir="$OUTPUT_DIR/pkg-$name-$version"
|
|
mkdir -p "$pkg_dir"
|
|
cp "$model" "$pkg_dir/"
|
|
local size=$(wc -c < "$model")
|
|
local sha=$(sha256sum "$model" | cut -c1-32)
|
|
cat > "$pkg_dir/manifest.json" << MANIFEST
|
|
{
|
|
"name": "$name",
|
|
"version": "$version",
|
|
"model_file": "$(basename $model)",
|
|
"size_bytes": $size,
|
|
"sha256": "$sha",
|
|
"format": "gguf",
|
|
"ix_compatible": true,
|
|
"created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
}
|
|
MANIFEST
|
|
tar -czf "$OUTPUT_DIR/$name-$version.ix-package" -C "$OUTPUT_DIR" "pkg-$name-$version"
|
|
rm -rf "$pkg_dir"
|
|
ok "Packaged: $OUTPUT_DIR/$name-$version.ix-package"
|
|
}
|
|
|
|
cmd_benchmark() {
|
|
local model="" prompt="Hello, how are you?" runs=5
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--model) model="$2"; shift ;;
|
|
--prompt) prompt="$2"; shift ;;
|
|
--runs) runs=$2; shift ;;
|
|
esac; shift
|
|
done
|
|
[ -z "$model" ] && err "Missing --model"
|
|
log "Benchmarking $model ($runs runs)"
|
|
log "Prompt: $prompt"
|
|
local total=0
|
|
for i in $(seq 1 $runs); do
|
|
local start=$(date +%s%N)
|
|
curl -s -X POST http://localhost:8080/v1/completions \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"prompt\":\"$prompt\",\"max_tokens\":50}" > /dev/null
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
log "Run $i: ${ms}ms"
|
|
total=$((total + ms))
|
|
done
|
|
local avg=$((total / runs))
|
|
ok "Average latency: ${avg}ms over $runs runs"
|
|
}
|
|
|
|
case "${1:-help}" in
|
|
convert) shift; cmd_convert "$@" ;;
|
|
quantize) shift; cmd_quantize "$@" ;;
|
|
package) shift; cmd_package "$@" ;;
|
|
benchmark) shift; cmd_benchmark "$@" ;;
|
|
*) usage ;;
|
|
esac
|