// Copyright (C) 2024-2026 Salka Elmadani. All rights reserved. // INPI eSoleau: 7phf-Ueye-2nWr-Vsgu — BSL-1.1 // Inference-X — Universal Inference Protocol // Morocco // Backend Manager Header — Device enumeration and routing #pragma once #include #include #include #include namespace inference_x { namespace compute { enum class ComputeBackend { Auto = 0, CPU, CUDA, ROCm, Metal, Vulkan, OpenCL, Hexagon, Snapdragon, TPU, Groq, Cerebras, FPGA, Gaudi, Inferentia, Maia, SambaNova, GraphCore, ARM_NEON, WebGPU }; enum class ComputeError { Success = 0, NotInitialized, InvalidDevice, NotSupported, OutOfMemory, LaunchFailed, SyncFailed }; struct DeviceInfo { ComputeBackend backend; int device_id; std::string name; size_t total_memory; size_t free_memory; int compute_capability_major; int compute_capability_minor; int num_sm; int max_threads_per_block; int warp_size; bool supports_fp16; bool supports_bf16; bool supports_int8; }; class BackendManager { public: static BackendManager& instance(); ComputeError initialize(); bool is_available(ComputeBackend backend) const; int get_device_count(ComputeBackend backend) const; const std::vector& get_devices() const { return devices_; } DeviceInfo get_best_device() const; const char* backend_name(ComputeBackend b) const; private: BackendManager() = default; ComputeError initialize_cpu(); ComputeError initialize_cuda(); ComputeError initialize_rocm(); DeviceInfo query_cpu_info() const; DeviceInfo query_cuda_info(int device_id) const; DeviceInfo query_rocm_info(int device_id) const; mutable std::mutex mutex_; bool initialized_ = false; bool cpu_available_ = false; bool cuda_available_ = false; bool rocm_available_ = false; int cuda_device_count_ = 0; int rocm_device_count_ = 0; std::vector devices_; }; } // namespace compute } // namespace inference_x