348 lines
11 KiB
Python
348 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Organ Architecture — organ_measure.py
|
|
Z-measure organ quality: signal vs noise.
|
|
|
|
CSCI — cross-scale coherence index
|
|
θ → 0° : noise (organ adds confusion)
|
|
θ → 90° : signal (organ adds knowledge)
|
|
|
|
Build v935
|
|
"""
|
|
|
|
import struct
|
|
import os
|
|
import sys
|
|
import json
|
|
import math
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
|
|
def read_organ_header(filepath):
|
|
"""Read organ binary header to get metadata."""
|
|
with open(filepath, 'rb') as f:
|
|
name_len = struct.unpack('<I', f.read(4))[0]
|
|
name = f.read(name_len).decode('utf-8', errors='replace')
|
|
n_dims = struct.unpack('<I', f.read(4))[0]
|
|
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
|
|
dtype = struct.unpack('<I', f.read(4))[0]
|
|
data_start = f.tell()
|
|
f.seek(0, 2)
|
|
data_size = f.tell() - data_start
|
|
return {
|
|
'name': name,
|
|
'dims': dims,
|
|
'dtype': dtype,
|
|
'data_start': data_start,
|
|
'data_size': data_size,
|
|
}
|
|
|
|
|
|
def read_organ_data_f32(filepath, max_elements=100000):
|
|
"""Read organ data as float32 values (sampling for large tensors)."""
|
|
info = read_organ_header(filepath)
|
|
|
|
with open(filepath, 'rb') as f:
|
|
f.seek(info['data_start'])
|
|
data = f.read(info['data_size'])
|
|
|
|
# For quantized types, we just analyze the raw bytes as a signal
|
|
# For F32/F16, we can read actual values
|
|
values = []
|
|
|
|
if info['dtype'] == 0: # F32
|
|
n = min(len(data) // 4, max_elements)
|
|
for i in range(n):
|
|
val = struct.unpack('<f', data[i*4:(i+1)*4])[0]
|
|
if math.isfinite(val):
|
|
values.append(val)
|
|
elif info['dtype'] == 1: # F16
|
|
# Read as uint16, convert manually
|
|
n = min(len(data) // 2, max_elements)
|
|
for i in range(n):
|
|
h = struct.unpack('<H', data[i*2:(i+1)*2])[0]
|
|
# IEEE 754 half-float to float
|
|
sign = (h >> 15) & 1
|
|
exp = (h >> 10) & 0x1f
|
|
frac = h & 0x3ff
|
|
if exp == 0:
|
|
val = ((-1)**sign) * (2**-14) * (frac / 1024)
|
|
elif exp == 31:
|
|
val = 0.0 # skip inf/nan
|
|
else:
|
|
val = ((-1)**sign) * (2**(exp-15)) * (1 + frac/1024)
|
|
if math.isfinite(val) and val != 0:
|
|
values.append(val)
|
|
else:
|
|
# Quantized: treat raw bytes as uint8 signal
|
|
n = min(len(data), max_elements)
|
|
step = max(1, len(data) // n)
|
|
for i in range(0, min(len(data), n * step), step):
|
|
values.append(float(data[i]))
|
|
|
|
return values, info
|
|
|
|
|
|
def compute_z_measure(values):
|
|
"""
|
|
Compute Z-measure for a tensor.
|
|
|
|
CSCI — cross-scale coherence index
|
|
|
|
We measure:
|
|
- Information density (entropy of distribution)
|
|
- Scale coherence (how organized the values are)
|
|
- θ = angle between signal and noise
|
|
|
|
Returns: dict with theta, magnitude, signal_ratio, entropy
|
|
"""
|
|
if not values or len(values) < 10:
|
|
return {'theta': 0, 'magnitude': 0, 'signal_ratio': 0, 'entropy': 0}
|
|
|
|
n = len(values)
|
|
|
|
# 1. Basic statistics
|
|
mean = sum(values) / n
|
|
variance = sum((v - mean)**2 for v in values) / n
|
|
std = math.sqrt(variance) if variance > 0 else 1e-10
|
|
|
|
# 2. Entropy (information density)
|
|
# Histogram-based entropy
|
|
n_bins = min(100, max(10, n // 100))
|
|
v_min = min(values)
|
|
v_max = max(values)
|
|
if v_max == v_min:
|
|
entropy = 0
|
|
else:
|
|
bin_width = (v_max - v_min) / n_bins
|
|
bins = [0] * n_bins
|
|
for v in values:
|
|
idx = min(int((v - v_min) / bin_width), n_bins - 1)
|
|
bins[idx] += 1
|
|
|
|
entropy = 0
|
|
for b in bins:
|
|
if b > 0:
|
|
p = b / n
|
|
entropy -= p * math.log2(p)
|
|
|
|
# Normalize to [0, 1]
|
|
max_entropy = math.log2(n_bins)
|
|
entropy = entropy / max_entropy if max_entropy > 0 else 0
|
|
|
|
# 3. Kurtosis (signal sharpness)
|
|
# High kurtosis = organized structure, low = uniform noise
|
|
if std > 1e-10:
|
|
kurt = sum((v - mean)**4 for v in values) / (n * std**4) - 3
|
|
else:
|
|
kurt = 0
|
|
|
|
# 4. Scale coherence
|
|
# Measure if values follow a structured distribution (not random)
|
|
# Sorted values should show structured steps, not smooth curves
|
|
sorted_vals = sorted(values[:min(1000, n)])
|
|
diffs = [sorted_vals[i+1] - sorted_vals[i] for i in range(len(sorted_vals)-1)]
|
|
if diffs:
|
|
diff_mean = sum(diffs) / len(diffs)
|
|
diff_var = sum((d - diff_mean)**2 for d in diffs) / len(diffs)
|
|
diff_std = math.sqrt(diff_var) if diff_var > 0 else 1e-10
|
|
# Coefficient of variation of differences
|
|
# Low CV = uniform spacing (noise), High CV = structured (signal)
|
|
cv = diff_std / diff_mean if diff_mean > 1e-10 else 0
|
|
else:
|
|
cv = 0
|
|
|
|
# 5. Compute θ
|
|
# Combine entropy (information), kurtosis (structure), CV (scale coherence)
|
|
# θ = 90° means pure signal, θ = 0° means pure noise
|
|
|
|
# Signal indicators: high kurtosis, high CV, moderate entropy
|
|
# Noise indicators: near-zero kurtosis, low CV, high entropy
|
|
|
|
signal_score = 0
|
|
|
|
# Entropy contribution (inverted — very high entropy = noise)
|
|
if entropy > 0.95:
|
|
signal_score += 0 # Nearly uniform = noise
|
|
elif entropy > 0.7:
|
|
signal_score += 0.3 # High but structured
|
|
elif entropy > 0.3:
|
|
signal_score += 0.8 # Good information density
|
|
else:
|
|
signal_score += 0.5 # Very concentrated
|
|
|
|
# Kurtosis contribution
|
|
abs_kurt = abs(kurt)
|
|
if abs_kurt > 10:
|
|
signal_score += 1.0 # Very structured
|
|
elif abs_kurt > 3:
|
|
signal_score += 0.7
|
|
elif abs_kurt > 1:
|
|
signal_score += 0.4
|
|
else:
|
|
signal_score += 0.1 # Gaussian-like = less structure
|
|
|
|
# CV contribution
|
|
if cv > 2:
|
|
signal_score += 1.0 # Highly non-uniform spacing
|
|
elif cv > 1:
|
|
signal_score += 0.7
|
|
elif cv > 0.5:
|
|
signal_score += 0.4
|
|
else:
|
|
signal_score += 0.1
|
|
|
|
# θ in radians [0, π/2]
|
|
theta = (signal_score / 3.0) * (math.pi / 2)
|
|
|
|
# Magnitude = information content * scale
|
|
magnitude = entropy * math.log1p(abs_kurt) * (1 + cv)
|
|
|
|
# Signal ratio
|
|
signal_ratio = math.sin(theta) # sin(θ): 0 at θ=0, 1 at θ=90°
|
|
|
|
return {
|
|
'theta': theta,
|
|
'theta_deg': math.degrees(theta),
|
|
'magnitude': magnitude,
|
|
'signal_ratio': signal_ratio,
|
|
'entropy': entropy,
|
|
'kurtosis': kurt,
|
|
'cv': cv,
|
|
'mean': mean,
|
|
'std': std,
|
|
'n_values': n,
|
|
}
|
|
|
|
|
|
def measure_organ(filepath, verbose=False):
|
|
"""Measure a single organ file."""
|
|
values, info = read_organ_data_f32(filepath)
|
|
z = compute_z_measure(values)
|
|
z['name'] = info['name']
|
|
z['dims'] = info['dims']
|
|
z['dtype'] = info['dtype']
|
|
z['file'] = str(filepath)
|
|
z['data_size'] = info['data_size']
|
|
return z
|
|
|
|
|
|
def measure_directory(organ_dir, verbose=False):
|
|
"""Measure all organs in a directory tree."""
|
|
results = []
|
|
organ_path = Path(organ_dir)
|
|
|
|
for bin_file in sorted(organ_path.rglob('*.bin')):
|
|
try:
|
|
z = measure_organ(bin_file, verbose)
|
|
results.append(z)
|
|
if verbose:
|
|
print(f" θ={z['theta_deg']:5.1f}° sig={z['signal_ratio']:.3f} {z['name'][:50]}")
|
|
except Exception as e:
|
|
print(f" [ERROR] {bin_file}: {e}")
|
|
|
|
return results
|
|
|
|
|
|
def print_summary(results, title=""):
|
|
"""Print Z-measure summary."""
|
|
if not results:
|
|
print("No organs measured.")
|
|
return
|
|
|
|
# Group by directory (organ type)
|
|
groups = {}
|
|
for r in results:
|
|
dirname = os.path.dirname(r['file']).split('/')[-1]
|
|
if dirname not in groups:
|
|
groups[dirname] = []
|
|
groups[dirname].append(r)
|
|
|
|
print(f"\n{'='*70}")
|
|
print(f" Z-MEASURE REPORT {title}")
|
|
print(f"{'='*70}")
|
|
|
|
for group_name in ['skeleton', 'organs', 'embed', 'norm', 'adapters', 'unknown']:
|
|
if group_name not in groups:
|
|
continue
|
|
group = groups[group_name]
|
|
|
|
avg_theta = sum(r['theta_deg'] for r in group) / len(group)
|
|
avg_signal = sum(r['signal_ratio'] for r in group) / len(group)
|
|
total_size = sum(r['data_size'] for r in group) / (1024 * 1024)
|
|
|
|
labels = {
|
|
'skeleton': 'SKELETON (Thought)',
|
|
'organs': 'ORGANS (Knowledge)',
|
|
'embed': 'EMBEDDING (Foundation)',
|
|
'norm': 'NORMALIZATION (Tissue)',
|
|
'adapters': 'ADAPTERS (Personality)',
|
|
'unknown': 'UNKNOWN',
|
|
}
|
|
|
|
print(f"\n {labels.get(group_name, group_name)}")
|
|
print(f" {'─'*50}")
|
|
print(f" Tensors: {len(group):4d} | Size: {total_size:8.1f} MB")
|
|
print(f" Avg θ: {avg_theta:5.1f}° | Avg Signal: {avg_signal:.3f}")
|
|
|
|
# Top and bottom organs by signal
|
|
sorted_group = sorted(group, key=lambda r: r['theta_deg'], reverse=True)
|
|
if len(sorted_group) > 3:
|
|
print(f" Best: θ={sorted_group[0]['theta_deg']:5.1f}° {sorted_group[0]['name'][:40]}")
|
|
print(f" Worst: θ={sorted_group[-1]['theta_deg']:5.1f}° {sorted_group[-1]['name'][:40]}")
|
|
|
|
# Global
|
|
avg_theta = sum(r['theta_deg'] for r in results) / len(results)
|
|
avg_signal = sum(r['signal_ratio'] for r in results) / len(results)
|
|
total_size = sum(r['data_size'] for r in results) / (1024 * 1024)
|
|
|
|
print(f"\n {'═'*50}")
|
|
print(f" GLOBAL: {len(results)} tensors | {total_size:.1f} MB | θ={avg_theta:.1f}° | signal={avg_signal:.3f}")
|
|
print(f" Build v935")
|
|
print(f"{'='*70}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Organ Architecture — Z-measure organ quality',
|
|
epilog='CSCI v1.0 — Cross-Scale Coherence Index'
|
|
)
|
|
parser.add_argument('--organ', '-o', help='Path to single organ .bin file')
|
|
parser.add_argument('--dir', '-d', help='Path to extracted organs directory')
|
|
parser.add_argument('--verbose', '-v', action='store_true')
|
|
parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.organ:
|
|
z = measure_organ(args.organ, args.verbose)
|
|
if args.json:
|
|
print(json.dumps(z, indent=2, default=str))
|
|
else:
|
|
print(f"Organ: {z['name']}")
|
|
print(f"θ = {z['theta_deg']:.1f}° | Signal = {z['signal_ratio']:.3f}")
|
|
print(f"Entropy: {z['entropy']:.3f} | Kurtosis: {z['kurtosis']:.2f} | CV: {z['cv']:.3f}")
|
|
|
|
elif args.dir:
|
|
results = measure_directory(args.dir, args.verbose)
|
|
if args.json:
|
|
print(json.dumps(results, indent=2, default=str))
|
|
else:
|
|
title = f"— {os.path.basename(args.dir)}"
|
|
print_summary(results, title)
|
|
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
|
|
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
|
|
# Licensed under Business Source License 1.1 — https://inference-x.com
|
|
# ─────────────────────────────────────────────────────────
|
|
# SHA256: 0851280f9f83e9f30e35fd7efff164f806f506f94aa9cd983c8fdae7318a9864
|
|
# SIG-ED25519: 7VtyjAri7KRdqUuc+WdkQkp50xKAkVRFqgqLHnJG0BkBltqVwJeYMScAkZ56b4mcsBWPhkj0Y8kS1fd2t/Y+BQ==
|
|
# VERIFY: python3 verify_authorship.py organ_measure.py
|