Better output from the same model. Fused computation, adaptive precision, surgical expert loading. 305 KB, 19 backends, zero dependencies. https://inference-x.com
201 lines
8.4 KiB
Makefile
201 lines
8.4 KiB
Makefile
# ══════════════════════════════════════════════════════════════════════════════
|
|
# INFERENCE-X UNIFIED — MAKEFILE
|
|
# One binary. All silicon. Hardware decides, not code.
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# COPYRIGHT (C) 2025-2026 SALKA ELMADANI — ALL RIGHTS RESERVED
|
|
# Morocco
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
CXX ?= g++
|
|
CC ?= gcc
|
|
CXXFLAGS = -std=c++17 -O3 -DNDEBUG -I. -fopenmp -Wall -Wno-unused-result
|
|
CFLAGS = -O3 -DNDEBUG -I.
|
|
LDFLAGS = -fopenmp -lpthread -lm
|
|
|
|
# Binary name
|
|
TARGET = inference-x
|
|
|
|
# Backend objects (populated by SDK detection below)
|
|
BACKEND_OBJS =
|
|
|
|
# ─── AUTO-DETECT PLATFORM ─────────────────────────────────────────────────────
|
|
ARCH := $(shell uname -m)
|
|
|
|
ifeq ($(ARCH),x86_64)
|
|
HAS_AVX512 := $(shell gcc -march=native -dM -E - < /dev/null 2>/dev/null | grep -c AVX512F)
|
|
ifeq ($(HAS_AVX512),1)
|
|
CXXFLAGS += -mavx512f -mavx512bw -mavx512vl -mfma -DIX_HAS_AVX512
|
|
$(info [IX] Detected AVX-512 → CPU_AVX512 backend)
|
|
else
|
|
CXXFLAGS += -march=native -fopenmp
|
|
$(info [IX] Detected AVX2 → GENERIC backend)
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(ARCH),aarch64)
|
|
CXXFLAGS += -DIX_HAS_NEON
|
|
$(info [IX] Detected ARM64 → ARM_NEON backend)
|
|
endif
|
|
|
|
ifeq ($(ARCH),armv7l)
|
|
CXXFLAGS += -mfpu=neon
|
|
$(info [IX] Detected ARM32 → ARM_NEON backend)
|
|
endif
|
|
|
|
# ─── AUTO-DETECT ACCELERATOR SDKs ─────────────────────────────────────────────
|
|
# Each SDK detection:
|
|
# 1. Checks for the SDK's header or tool
|
|
# 2. Sets IX_USE_* define
|
|
# 3. Adds the backend .c/.cpp to BACKEND_OBJS
|
|
# 4. Adds SDK-specific link flags
|
|
#
|
|
# Without SDK → nothing happens. Zero noise.
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
# CPU AVX-512 backend (always available on x86_64 with AVX-512)
|
|
ifeq ($(HAS_AVX512),1)
|
|
CXXFLAGS += -DIX_USE_CPU_AVX512
|
|
CFLAGS += -mavx512f -mavx512bw -mavx512vl -mfma -DIX_USE_CPU_AVX512
|
|
BACKEND_OBJS += backends/q4_kernels/cpu/q4_gemm_cpu.o
|
|
$(info [IX] → CPU AVX-512 backend: ENABLED)
|
|
endif
|
|
|
|
# Qualcomm Hexagon SDK
|
|
ifneq ($(wildcard $(HEXAGON_SDK_ROOT)/libs/common/qurt/ADSPv*),)
|
|
CXXFLAGS += -DIX_USE_HEXAGON
|
|
CFLAGS += -DIX_USE_HEXAGON
|
|
BACKEND_OBJS += backends/q4_kernels/hexagon/q4_gemm_hexagon.o
|
|
LDFLAGS += -L$(HEXAGON_SDK_ROOT)/libs -lhexagon_nn
|
|
$(info [IX] → Hexagon HVX backend: ENABLED)
|
|
endif
|
|
|
|
# Qualcomm Snapdragon (Android NDK + Hexagon)
|
|
ifneq ($(wildcard $(ANDROID_NDK)/toolchains/llvm/prebuilt/*/bin/clang++),)
|
|
CXXFLAGS += -DIX_USE_SNAPDRAGON
|
|
BACKEND_OBJS += backends/q4_kernels/snapdragon/q4_gemm_snapdragon_70b.o
|
|
$(info [IX] → Snapdragon Hybrid backend: ENABLED)
|
|
endif
|
|
|
|
# Cerebras SDK
|
|
ifneq ($(wildcard $(CEREBRASESDK)/include/cerebras/*.h),)
|
|
CXXFLAGS += -DIX_USE_CEREBRAS
|
|
CFLAGS += -DIX_USE_CEREBRAS
|
|
BACKEND_OBJS += backends/q4_kernels/cerebras/q4_gemm_wse.o
|
|
LDFLAGS += -L$(CEREBRASESDK)/lib -lcerebras_runtime
|
|
$(info [IX] → Cerebras WSE backend: ENABLED)
|
|
endif
|
|
|
|
# Groq SDK
|
|
ifneq ($(wildcard /usr/include/groq/groq_runtime.h),)
|
|
CXXFLAGS += -DIX_USE_GROQ
|
|
CFLAGS += -DIX_USE_GROQ
|
|
BACKEND_OBJS += backends/q4_kernels/groq/q4_gemm_groq_lpu.o
|
|
LDFLAGS += -lgroq_runtime
|
|
$(info [IX] → Groq LPU backend: ENABLED)
|
|
endif
|
|
|
|
# Intel Gaudi (Habana Synapse)
|
|
ifneq ($(wildcard /usr/include/synapse_api.h),)
|
|
CXXFLAGS += -DIX_USE_GAUDI
|
|
BACKEND_OBJS += backends/q4_kernels/gaudi/q4_gemm_gaudi.o
|
|
LDFLAGS += -lSynapse
|
|
$(info [IX] → Gaudi Habana backend: ENABLED)
|
|
endif
|
|
|
|
# AWS Inferentia (Neuron SDK)
|
|
ifneq ($(wildcard /opt/aws/neuron/include/nrt/nrt.h),)
|
|
CXXFLAGS += -DIX_USE_INFERENTIA
|
|
BACKEND_OBJS += backends/q4_kernels/inferentia/q4_gemm_inferentia.o
|
|
LDFLAGS += -L/opt/aws/neuron/lib -lnrt
|
|
$(info [IX] → AWS Inferentia backend: ENABLED)
|
|
endif
|
|
|
|
# Xilinx FPGA (Vitis)
|
|
ifneq ($(wildcard $(XILINX_VITIS)/include/ap_int.h),)
|
|
CXXFLAGS += -DIX_USE_FPGA_XILINX
|
|
BACKEND_OBJS += backends/q4_kernels/fpga_xilinx/q4_gemm_fpga_xilinx.o
|
|
LDFLAGS += -L$(XILINX_VITIS)/lib -lxrt_core
|
|
$(info [IX] → Xilinx FPGA backend: ENABLED)
|
|
endif
|
|
|
|
# Graphcore IPU (Poplar SDK)
|
|
ifneq ($(wildcard $(POPLAR_SDK)/include/poplar/Engine.hpp),)
|
|
CXXFLAGS += -DIX_USE_GRAPHCORE
|
|
BACKEND_OBJS += backends/q4_kernels/graphcore/q4_gemm_ipu.o
|
|
LDFLAGS += -L$(POPLAR_SDK)/lib -lpoplar -lpoplin
|
|
$(info [IX] → Graphcore IPU backend: ENABLED)
|
|
endif
|
|
|
|
# SambaNova RDU
|
|
ifneq ($(wildcard $(SAMBANOVA_SDK)/include/samba/*.h),)
|
|
CXXFLAGS += -DIX_USE_SAMBANOVA
|
|
BACKEND_OBJS += backends/q4_kernels/sambanova/q4_gemm_sambanova.o
|
|
LDFLAGS += -L$(SAMBANOVA_SDK)/lib -lsamba_runtime
|
|
$(info [IX] → SambaNova RDU backend: ENABLED)
|
|
endif
|
|
|
|
# Microsoft Maia
|
|
ifneq ($(wildcard /usr/include/maia_runtime.h),)
|
|
CXXFLAGS += -DIX_USE_MAIA
|
|
BACKEND_OBJS += backends/q4_kernels/maia/q4_gemm_maia.o
|
|
LDFLAGS += -lmaia_runtime
|
|
$(info [IX] → Microsoft Maia backend: ENABLED)
|
|
endif
|
|
|
|
# ─── BUILD RULES ───────────────────────────────────────────────────────────────
|
|
SRC = infer.cpp
|
|
|
|
.PHONY: all clean info
|
|
|
|
all: $(TARGET)
|
|
@echo ""
|
|
@echo "╔══════════════════════════════════════════════════════════════╗"
|
|
@echo "║ Inference-X Unified — Build Complete ║"
|
|
@echo "║ Binary: ./$(TARGET) ║"
|
|
@echo "║ Usage: ./$(TARGET) <model_path> -p 'prompt' -n 512 ║"
|
|
@echo "╚══════════════════════════════════════════════════════════════╝"
|
|
|
|
$(TARGET): $(SRC) $(BACKEND_OBJS)
|
|
$(CXX) $(CXXFLAGS) -o $@ $< $(BACKEND_OBJS) $(LDFLAGS)
|
|
|
|
# ── Backend compilation rules ─────────────────────────────────────────────────
|
|
# .c backends (CPU, Cerebras, Groq, Hexagon)
|
|
backends/q4_kernels/%.o: backends/q4_kernels/%.c
|
|
$(CC) $(CFLAGS) -c -o $@ $<
|
|
|
|
# .cpp backends (everything else)
|
|
backends/q4_kernels/%.o: backends/q4_kernels/%.cpp
|
|
$(CXX) $(CXXFLAGS) -c -o $@ $<
|
|
|
|
# ─── CONVENIENCE TARGETS ──────────────────────────────────────────────────────
|
|
debug: CXXFLAGS = -std=c++17 -g -O0 -fsanitize=address -I.
|
|
debug: LDFLAGS += -fsanitize=address
|
|
debug: $(TARGET)
|
|
|
|
bench: CXXFLAGS += -O3 -march=native -DNDEBUG
|
|
bench: $(TARGET)
|
|
|
|
# Run with Kimi K2.5 (VPS default)
|
|
run-kimi: $(TARGET)
|
|
./$(TARGET) /mnt/data/models/kimi-k2.5/UD-TQ1_0 -p "Hello" -n 10 -t 0.6
|
|
|
|
# Run with DeepSeek R1 7B
|
|
run-ds7b: $(TARGET)
|
|
./$(TARGET) /mnt/data/winwin_ai/models/gguf/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf \
|
|
-p "Explain quantum computing" -n 256 -t 0.7
|
|
|
|
clean:
|
|
rm -f $(TARGET) $(BACKEND_OBJS)
|
|
|
|
# ─── INFO ──────────────────────────────────────────────────────────────────────
|
|
info:
|
|
@echo "Architecture: $(ARCH)"
|
|
@echo "Compiler: $(CXX)"
|
|
@echo "Flags: $(CXXFLAGS)"
|
|
@echo "Backends: $(if $(BACKEND_OBJS),$(BACKEND_OBJS),generic only)"
|
|
@echo ""
|
|
@echo "Source tree:"
|
|
@find . -name '*.h' -o -name '*.cpp' -o -name '*.c' | sort
|
|
@echo ""
|
|
@find . -name '*.h' -o -name '*.cpp' -o -name '*.c' | xargs wc -l | tail -1
|