inference-x/Makefile
Salka Elmadani ec36668cf5 Inference-X v1.0 — Universal AI Inference Engine
Better output from the same model. Fused computation, adaptive precision,
surgical expert loading. 305 KB, 19 backends, zero dependencies.

https://inference-x.com
2026-02-23 07:10:47 +00:00

201 lines
8.4 KiB
Makefile

# ══════════════════════════════════════════════════════════════════════════════
# INFERENCE-X UNIFIED — MAKEFILE
# One binary. All silicon. Hardware decides, not code.
# ═══════════════════════════════════════════════════════════════════════════════
# COPYRIGHT (C) 2025-2026 SALKA ELMADANI — ALL RIGHTS RESERVED
# Morocco
# ═══════════════════════════════════════════════════════════════════════════════
CXX ?= g++
CC ?= gcc
CXXFLAGS = -std=c++17 -O3 -DNDEBUG -I. -fopenmp -Wall -Wno-unused-result
CFLAGS = -O3 -DNDEBUG -I.
LDFLAGS = -fopenmp -lpthread -lm
# Binary name
TARGET = inference-x
# Backend objects (populated by SDK detection below)
BACKEND_OBJS =
# ─── AUTO-DETECT PLATFORM ─────────────────────────────────────────────────────
ARCH := $(shell uname -m)
ifeq ($(ARCH),x86_64)
HAS_AVX512 := $(shell gcc -march=native -dM -E - < /dev/null 2>/dev/null | grep -c AVX512F)
ifeq ($(HAS_AVX512),1)
CXXFLAGS += -mavx512f -mavx512bw -mavx512vl -mfma -DIX_HAS_AVX512
$(info [IX] Detected AVX-512 → CPU_AVX512 backend)
else
CXXFLAGS += -march=native -fopenmp
$(info [IX] Detected AVX2 → GENERIC backend)
endif
endif
ifeq ($(ARCH),aarch64)
CXXFLAGS += -DIX_HAS_NEON
$(info [IX] Detected ARM64 → ARM_NEON backend)
endif
ifeq ($(ARCH),armv7l)
CXXFLAGS += -mfpu=neon
$(info [IX] Detected ARM32 → ARM_NEON backend)
endif
# ─── AUTO-DETECT ACCELERATOR SDKs ─────────────────────────────────────────────
# Each SDK detection:
# 1. Checks for the SDK's header or tool
# 2. Sets IX_USE_* define
# 3. Adds the backend .c/.cpp to BACKEND_OBJS
# 4. Adds SDK-specific link flags
#
# Without SDK → nothing happens. Zero noise.
# ──────────────────────────────────────────────────────────────────────────────
# CPU AVX-512 backend (always available on x86_64 with AVX-512)
ifeq ($(HAS_AVX512),1)
CXXFLAGS += -DIX_USE_CPU_AVX512
CFLAGS += -mavx512f -mavx512bw -mavx512vl -mfma -DIX_USE_CPU_AVX512
BACKEND_OBJS += backends/q4_kernels/cpu/q4_gemm_cpu.o
$(info [IX] → CPU AVX-512 backend: ENABLED)
endif
# Qualcomm Hexagon SDK
ifneq ($(wildcard $(HEXAGON_SDK_ROOT)/libs/common/qurt/ADSPv*),)
CXXFLAGS += -DIX_USE_HEXAGON
CFLAGS += -DIX_USE_HEXAGON
BACKEND_OBJS += backends/q4_kernels/hexagon/q4_gemm_hexagon.o
LDFLAGS += -L$(HEXAGON_SDK_ROOT)/libs -lhexagon_nn
$(info [IX] → Hexagon HVX backend: ENABLED)
endif
# Qualcomm Snapdragon (Android NDK + Hexagon)
ifneq ($(wildcard $(ANDROID_NDK)/toolchains/llvm/prebuilt/*/bin/clang++),)
CXXFLAGS += -DIX_USE_SNAPDRAGON
BACKEND_OBJS += backends/q4_kernels/snapdragon/q4_gemm_snapdragon_70b.o
$(info [IX] → Snapdragon Hybrid backend: ENABLED)
endif
# Cerebras SDK
ifneq ($(wildcard $(CEREBRASESDK)/include/cerebras/*.h),)
CXXFLAGS += -DIX_USE_CEREBRAS
CFLAGS += -DIX_USE_CEREBRAS
BACKEND_OBJS += backends/q4_kernels/cerebras/q4_gemm_wse.o
LDFLAGS += -L$(CEREBRASESDK)/lib -lcerebras_runtime
$(info [IX] → Cerebras WSE backend: ENABLED)
endif
# Groq SDK
ifneq ($(wildcard /usr/include/groq/groq_runtime.h),)
CXXFLAGS += -DIX_USE_GROQ
CFLAGS += -DIX_USE_GROQ
BACKEND_OBJS += backends/q4_kernels/groq/q4_gemm_groq_lpu.o
LDFLAGS += -lgroq_runtime
$(info [IX] → Groq LPU backend: ENABLED)
endif
# Intel Gaudi (Habana Synapse)
ifneq ($(wildcard /usr/include/synapse_api.h),)
CXXFLAGS += -DIX_USE_GAUDI
BACKEND_OBJS += backends/q4_kernels/gaudi/q4_gemm_gaudi.o
LDFLAGS += -lSynapse
$(info [IX] → Gaudi Habana backend: ENABLED)
endif
# AWS Inferentia (Neuron SDK)
ifneq ($(wildcard /opt/aws/neuron/include/nrt/nrt.h),)
CXXFLAGS += -DIX_USE_INFERENTIA
BACKEND_OBJS += backends/q4_kernels/inferentia/q4_gemm_inferentia.o
LDFLAGS += -L/opt/aws/neuron/lib -lnrt
$(info [IX] → AWS Inferentia backend: ENABLED)
endif
# Xilinx FPGA (Vitis)
ifneq ($(wildcard $(XILINX_VITIS)/include/ap_int.h),)
CXXFLAGS += -DIX_USE_FPGA_XILINX
BACKEND_OBJS += backends/q4_kernels/fpga_xilinx/q4_gemm_fpga_xilinx.o
LDFLAGS += -L$(XILINX_VITIS)/lib -lxrt_core
$(info [IX] → Xilinx FPGA backend: ENABLED)
endif
# Graphcore IPU (Poplar SDK)
ifneq ($(wildcard $(POPLAR_SDK)/include/poplar/Engine.hpp),)
CXXFLAGS += -DIX_USE_GRAPHCORE
BACKEND_OBJS += backends/q4_kernels/graphcore/q4_gemm_ipu.o
LDFLAGS += -L$(POPLAR_SDK)/lib -lpoplar -lpoplin
$(info [IX] → Graphcore IPU backend: ENABLED)
endif
# SambaNova RDU
ifneq ($(wildcard $(SAMBANOVA_SDK)/include/samba/*.h),)
CXXFLAGS += -DIX_USE_SAMBANOVA
BACKEND_OBJS += backends/q4_kernels/sambanova/q4_gemm_sambanova.o
LDFLAGS += -L$(SAMBANOVA_SDK)/lib -lsamba_runtime
$(info [IX] → SambaNova RDU backend: ENABLED)
endif
# Microsoft Maia
ifneq ($(wildcard /usr/include/maia_runtime.h),)
CXXFLAGS += -DIX_USE_MAIA
BACKEND_OBJS += backends/q4_kernels/maia/q4_gemm_maia.o
LDFLAGS += -lmaia_runtime
$(info [IX] → Microsoft Maia backend: ENABLED)
endif
# ─── BUILD RULES ───────────────────────────────────────────────────────────────
SRC = infer.cpp
.PHONY: all clean info
all: $(TARGET)
@echo ""
@echo "╔══════════════════════════════════════════════════════════════╗"
@echo "║ Inference-X Unified — Build Complete ║"
@echo "║ Binary: ./$(TARGET)"
@echo "║ Usage: ./$(TARGET) <model_path> -p 'prompt' -n 512 ║"
@echo "╚══════════════════════════════════════════════════════════════╝"
$(TARGET): $(SRC) $(BACKEND_OBJS)
$(CXX) $(CXXFLAGS) -o $@ $< $(BACKEND_OBJS) $(LDFLAGS)
# ── Backend compilation rules ─────────────────────────────────────────────────
# .c backends (CPU, Cerebras, Groq, Hexagon)
backends/q4_kernels/%.o: backends/q4_kernels/%.c
$(CC) $(CFLAGS) -c -o $@ $<
# .cpp backends (everything else)
backends/q4_kernels/%.o: backends/q4_kernels/%.cpp
$(CXX) $(CXXFLAGS) -c -o $@ $<
# ─── CONVENIENCE TARGETS ──────────────────────────────────────────────────────
debug: CXXFLAGS = -std=c++17 -g -O0 -fsanitize=address -I.
debug: LDFLAGS += -fsanitize=address
debug: $(TARGET)
bench: CXXFLAGS += -O3 -march=native -DNDEBUG
bench: $(TARGET)
# Run with Kimi K2.5 (VPS default)
run-kimi: $(TARGET)
./$(TARGET) /mnt/data/models/kimi-k2.5/UD-TQ1_0 -p "Hello" -n 10 -t 0.6
# Run with DeepSeek R1 7B
run-ds7b: $(TARGET)
./$(TARGET) /mnt/data/winwin_ai/models/gguf/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf \
-p "Explain quantum computing" -n 256 -t 0.7
clean:
rm -f $(TARGET) $(BACKEND_OBJS)
# ─── INFO ──────────────────────────────────────────────────────────────────────
info:
@echo "Architecture: $(ARCH)"
@echo "Compiler: $(CXX)"
@echo "Flags: $(CXXFLAGS)"
@echo "Backends: $(if $(BACKEND_OBJS),$(BACKEND_OBJS),generic only)"
@echo ""
@echo "Source tree:"
@find . -name '*.h' -o -name '*.cpp' -o -name '*.c' | sort
@echo ""
@find . -name '*.h' -o -name '*.cpp' -o -name '*.c' | xargs wc -l | tail -1