inference-x/compute/backend_manager.h
Salka Elmadani ec36668cf5 Inference-X v1.0 — Universal AI Inference Engine
Better output from the same model. Fused computation, adaptive precision,
surgical expert loading. 305 KB, 19 backends, zero dependencies.

https://inference-x.com
2026-02-23 07:10:47 +00:00

75 lines
2.0 KiB
C++

// Copyright (C) 2024-2026 Salka Elmadani. All rights reserved.
// INPI eSoleau: 7phf-Ueye-2nWr-Vsgu — BSL-1.1
// Inference-X — Universal Inference Protocol
// Morocco
// Backend Manager Header — Device enumeration and routing
#pragma once
#include <string>
#include <vector>
#include <mutex>
#include <cstdint>
namespace inference_x {
namespace compute {
enum class ComputeBackend {
Auto = 0, CPU, CUDA, ROCm, Metal, Vulkan, OpenCL,
Hexagon, Snapdragon, TPU, Groq, Cerebras, FPGA,
Gaudi, Inferentia, Maia, SambaNova, GraphCore,
ARM_NEON, WebGPU
};
enum class ComputeError {
Success = 0, NotInitialized, InvalidDevice, NotSupported,
OutOfMemory, LaunchFailed, SyncFailed
};
struct DeviceInfo {
ComputeBackend backend;
int device_id;
std::string name;
size_t total_memory;
size_t free_memory;
int compute_capability_major;
int compute_capability_minor;
int num_sm;
int max_threads_per_block;
int warp_size;
bool supports_fp16;
bool supports_bf16;
bool supports_int8;
};
class BackendManager {
public:
static BackendManager& instance();
ComputeError initialize();
bool is_available(ComputeBackend backend) const;
int get_device_count(ComputeBackend backend) const;
const std::vector<DeviceInfo>& get_devices() const { return devices_; }
DeviceInfo get_best_device() const;
const char* backend_name(ComputeBackend b) const;
private:
BackendManager() = default;
ComputeError initialize_cpu();
ComputeError initialize_cuda();
ComputeError initialize_rocm();
DeviceInfo query_cpu_info() const;
DeviceInfo query_cuda_info(int device_id) const;
DeviceInfo query_rocm_info(int device_id) const;
mutable std::mutex mutex_;
bool initialized_ = false;
bool cpu_available_ = false;
bool cuda_available_ = false;
bool rocm_available_ = false;
int cuda_device_count_ = 0;
int rocm_device_count_ = 0;
std::vector<DeviceInfo> devices_;
};
} // namespace compute
} // namespace inference_x