// ═══════════════════════════════════════════════════════════════════════════════ // INFERENCEX — Expert Profiler (Kimi-Signal-935 Genesis) // Copyright (C) 2025-2026 Salka Elmadani. All rights reserved. // Licensed under the Business Source License 1.1 (BSL-1.1) // See LICENSE file for full terms. Morocco. // // NOTICE: This file is part of InferenceX by Salka Elmadani. // Commercial use by entities with revenue >= $1M USD requires a license. // Contact: Elmadani.SALKA@proton.me // ═══════════════════════════════════════════════════════════════════════════════ #pragma once #include #include #include #include #include #include #include namespace ix { class ExpertProfiler { public: void init(int n_layers, int n_experts) { n_layers_ = n_layers; n_experts_ = n_experts; counts_.resize(n_layers, std::vector(n_experts, 0)); co_occur_.resize(n_experts, std::vector(n_experts, 0)); total_tokens_ = 0; enabled_ = true; } void record(int layer, const int* expert_ids, int n_active) { if (!enabled_ || layer >= n_layers_) return; for (int i = 0; i < n_active; ++i) { int eid = expert_ids[i]; if (eid >= 0 && eid < n_experts_) { counts_[layer][eid]++; } } // Co-occurrence (layer 0 only, for correlation analysis) if (layer == 0) { for (int i = 0; i < n_active; ++i) { for (int j = i + 1; j < n_active; ++j) { int a = expert_ids[i], b = expert_ids[j]; if (a >= 0 && a < n_experts_ && b >= 0 && b < n_experts_) { co_occur_[a][b]++; co_occur_[b][a]++; } } } total_tokens_++; } } // Dump CSV: layer, expert_id, count, pct void dump_csv(const char* path) const { FILE* f = fopen(path, "w"); if (!f) { printf("[PROFILER] Cannot write %s\n", path); return; } fprintf(f, "layer,expert_id,count,pct_of_tokens\n"); for (int l = 0; l < n_layers_; ++l) { for (int e = 0; e < n_experts_; ++e) { if (counts_[l][e] > 0) { fprintf(f, "%d,%d,%lu,%.6f\n", l, e, (unsigned long)counts_[l][e], total_tokens_ > 0 ? (double)counts_[l][e] / total_tokens_ : 0.0); } } } fclose(f); printf("[PROFILER] Expert activations → %s (%lu tokens)\n", path, (unsigned long)total_tokens_); } // Dump summary: per-layer analysis void dump_summary(const char* path) const { FILE* f = fopen(path, "w"); if (!f) return; fprintf(f, "# KIMI-SIGNAL-935 Expert Profile | %lu tokens\n\n", (unsigned long)total_tokens_); for (int l = 0; l < n_layers_; ++l) { // Sort experts by activation count std::vector> sorted; uint64_t layer_total = 0; for (int e = 0; e < n_experts_; ++e) { if (counts_[l][e] > 0) { sorted.push_back({counts_[l][e], e}); layer_total += counts_[l][e]; } } std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) { return a.first > b.first; }); // Find thresholds uint64_t cumsum = 0; int n_90 = 0, n_95 = 0, n_99 = 0; for (size_t i = 0; i < sorted.size(); ++i) { cumsum += sorted[i].first; double pct = (double)cumsum / layer_total; if (n_90 == 0 && pct >= 0.90) n_90 = (int)i + 1; if (n_95 == 0 && pct >= 0.95) n_95 = (int)i + 1; if (n_99 == 0 && pct >= 0.99) n_99 = (int)i + 1; } int active = (int)sorted.size(); int dead = n_experts_ - active; fprintf(f, "Layer %2d: %3d active, %3d dead | " "90%%=%3d experts, 95%%=%3d, 99%%=%3d | " "top expert: #%d (%.1f%%)\n", l, active, dead, n_90, n_95, n_99, sorted.empty() ? -1 : sorted[0].second, sorted.empty() ? 0.0 : 100.0 * sorted[0].first / layer_total); } // Global recommendation fprintf(f, "\n# PRUNING RECOMMENDATION\n"); // Average across layers double avg_90 = 0, avg_95 = 0, avg_99 = 0; for (int l = 0; l < n_layers_; ++l) { std::vector sorted_counts(counts_[l]); std::sort(sorted_counts.begin(), sorted_counts.end(), std::greater<>()); uint64_t total = 0; for (auto c : sorted_counts) total += c; if (total == 0) continue; uint64_t cum = 0; for (int i = 0; i < n_experts_; ++i) { cum += sorted_counts[i]; double pct = (double)cum / total; if (avg_90 == 0 && pct >= 0.90) avg_90 += i + 1; if (avg_95 == 0 && pct >= 0.95) avg_95 += i + 1; if (avg_99 == 0 && pct >= 0.99) avg_99 += i + 1; } } avg_90 /= n_layers_; avg_95 /= n_layers_; avg_99 /= n_layers_; double size_full = 226.0; // GB double expert_ratio = (double)(n_experts_ - 8) / n_experts_; // non-shared fprintf(f, "\nAverage experts for 90%% signal: %.0f\n", avg_90); fprintf(f, "Average experts for 95%% signal: %.0f\n", avg_95); fprintf(f, "Average experts for 99%% signal: %.0f\n", avg_99); fprintf(f, "\nEstimated model sizes:\n"); fprintf(f, " 32 experts: ~%.0f GB\n", size_full * (1.0 - expert_ratio * (1.0 - 32.0/n_experts_))); fprintf(f, " 64 experts: ~%.0f GB\n", size_full * (1.0 - expert_ratio * (1.0 - 64.0/n_experts_))); fprintf(f, " 128 experts: ~%.0f GB\n", size_full * (1.0 - expert_ratio * (1.0 - 128.0/n_experts_))); fclose(f); printf("[PROFILER] Summary → %s\n", path); } // Get the top-N experts globally (union across all layers) std::vector get_essential_experts(int top_n_per_layer) const { std::vector global(n_experts_, 0); for (int l = 0; l < n_layers_; ++l) { // Get top-N for this layer std::vector> sorted; for (int e = 0; e < n_experts_; ++e) { sorted.push_back({counts_[l][e], e}); } std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) { return a.first > b.first; }); for (int i = 0; i < std::min(top_n_per_layer, n_experts_); ++i) { global[sorted[i].second] += sorted[i].first; } } // Sort globally std::vector> gsorted; for (int e = 0; e < n_experts_; ++e) { if (global[e] > 0) gsorted.push_back({global[e], e}); } std::sort(gsorted.begin(), gsorted.end(), [](const auto& a, const auto& b) { return a.first > b.first; }); std::vector result; for (auto& p : gsorted) result.push_back(p.second); return result; } uint64_t total_tokens() const { return total_tokens_; } bool enabled() const { return enabled_; } void enable() { enabled_ = true; } void disable() { enabled_ = false; } private: int n_layers_ = 0; int n_experts_ = 0; std::vector> counts_; // [layer][expert_id] std::vector> co_occur_; // [expert][expert] uint64_t total_tokens_ = 0; bool enabled_ = false; }; // Global profiler instance static ExpertProfiler g_expert_profiler; } // namespace ix