organ-architecture/organ_measure.py

348 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Organ Architecture — organ_measure.py
Z-measure organ quality: signal vs noise.
CSCI — cross-scale coherence index
θ → 0° : noise (organ adds confusion)
θ → 90° : signal (organ adds knowledge)
Build v935
"""
import struct
import os
import sys
import json
import math
import argparse
from pathlib import Path
def read_organ_header(filepath):
"""Read organ binary header to get metadata."""
with open(filepath, 'rb') as f:
name_len = struct.unpack('<I', f.read(4))[0]
name = f.read(name_len).decode('utf-8', errors='replace')
n_dims = struct.unpack('<I', f.read(4))[0]
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
dtype = struct.unpack('<I', f.read(4))[0]
data_start = f.tell()
f.seek(0, 2)
data_size = f.tell() - data_start
return {
'name': name,
'dims': dims,
'dtype': dtype,
'data_start': data_start,
'data_size': data_size,
}
def read_organ_data_f32(filepath, max_elements=100000):
"""Read organ data as float32 values (sampling for large tensors)."""
info = read_organ_header(filepath)
with open(filepath, 'rb') as f:
f.seek(info['data_start'])
data = f.read(info['data_size'])
# For quantized types, we just analyze the raw bytes as a signal
# For F32/F16, we can read actual values
values = []
if info['dtype'] == 0: # F32
n = min(len(data) // 4, max_elements)
for i in range(n):
val = struct.unpack('<f', data[i*4:(i+1)*4])[0]
if math.isfinite(val):
values.append(val)
elif info['dtype'] == 1: # F16
# Read as uint16, convert manually
n = min(len(data) // 2, max_elements)
for i in range(n):
h = struct.unpack('<H', data[i*2:(i+1)*2])[0]
# IEEE 754 half-float to float
sign = (h >> 15) & 1
exp = (h >> 10) & 0x1f
frac = h & 0x3ff
if exp == 0:
val = ((-1)**sign) * (2**-14) * (frac / 1024)
elif exp == 31:
val = 0.0 # skip inf/nan
else:
val = ((-1)**sign) * (2**(exp-15)) * (1 + frac/1024)
if math.isfinite(val) and val != 0:
values.append(val)
else:
# Quantized: treat raw bytes as uint8 signal
n = min(len(data), max_elements)
step = max(1, len(data) // n)
for i in range(0, min(len(data), n * step), step):
values.append(float(data[i]))
return values, info
def compute_z_measure(values):
"""
Compute Z-measure for a tensor.
CSCI — cross-scale coherence index
We measure:
- Information density (entropy of distribution)
- Scale coherence (how organized the values are)
- θ = angle between signal and noise
Returns: dict with theta, magnitude, signal_ratio, entropy
"""
if not values or len(values) < 10:
return {'theta': 0, 'magnitude': 0, 'signal_ratio': 0, 'entropy': 0}
n = len(values)
# 1. Basic statistics
mean = sum(values) / n
variance = sum((v - mean)**2 for v in values) / n
std = math.sqrt(variance) if variance > 0 else 1e-10
# 2. Entropy (information density)
# Histogram-based entropy
n_bins = min(100, max(10, n // 100))
v_min = min(values)
v_max = max(values)
if v_max == v_min:
entropy = 0
else:
bin_width = (v_max - v_min) / n_bins
bins = [0] * n_bins
for v in values:
idx = min(int((v - v_min) / bin_width), n_bins - 1)
bins[idx] += 1
entropy = 0
for b in bins:
if b > 0:
p = b / n
entropy -= p * math.log2(p)
# Normalize to [0, 1]
max_entropy = math.log2(n_bins)
entropy = entropy / max_entropy if max_entropy > 0 else 0
# 3. Kurtosis (signal sharpness)
# High kurtosis = organized structure, low = uniform noise
if std > 1e-10:
kurt = sum((v - mean)**4 for v in values) / (n * std**4) - 3
else:
kurt = 0
# 4. Scale coherence
# Measure if values follow a structured distribution (not random)
# Sorted values should show structured steps, not smooth curves
sorted_vals = sorted(values[:min(1000, n)])
diffs = [sorted_vals[i+1] - sorted_vals[i] for i in range(len(sorted_vals)-1)]
if diffs:
diff_mean = sum(diffs) / len(diffs)
diff_var = sum((d - diff_mean)**2 for d in diffs) / len(diffs)
diff_std = math.sqrt(diff_var) if diff_var > 0 else 1e-10
# Coefficient of variation of differences
# Low CV = uniform spacing (noise), High CV = structured (signal)
cv = diff_std / diff_mean if diff_mean > 1e-10 else 0
else:
cv = 0
# 5. Compute θ
# Combine entropy (information), kurtosis (structure), CV (scale coherence)
# θ = 90° means pure signal, θ = 0° means pure noise
# Signal indicators: high kurtosis, high CV, moderate entropy
# Noise indicators: near-zero kurtosis, low CV, high entropy
signal_score = 0
# Entropy contribution (inverted — very high entropy = noise)
if entropy > 0.95:
signal_score += 0 # Nearly uniform = noise
elif entropy > 0.7:
signal_score += 0.3 # High but structured
elif entropy > 0.3:
signal_score += 0.8 # Good information density
else:
signal_score += 0.5 # Very concentrated
# Kurtosis contribution
abs_kurt = abs(kurt)
if abs_kurt > 10:
signal_score += 1.0 # Very structured
elif abs_kurt > 3:
signal_score += 0.7
elif abs_kurt > 1:
signal_score += 0.4
else:
signal_score += 0.1 # Gaussian-like = less structure
# CV contribution
if cv > 2:
signal_score += 1.0 # Highly non-uniform spacing
elif cv > 1:
signal_score += 0.7
elif cv > 0.5:
signal_score += 0.4
else:
signal_score += 0.1
# θ in radians [0, π/2]
theta = (signal_score / 3.0) * (math.pi / 2)
# Magnitude = information content * scale
magnitude = entropy * math.log1p(abs_kurt) * (1 + cv)
# Signal ratio
signal_ratio = math.sin(theta) # sin(θ): 0 at θ=0, 1 at θ=90°
return {
'theta': theta,
'theta_deg': math.degrees(theta),
'magnitude': magnitude,
'signal_ratio': signal_ratio,
'entropy': entropy,
'kurtosis': kurt,
'cv': cv,
'mean': mean,
'std': std,
'n_values': n,
}
def measure_organ(filepath, verbose=False):
"""Measure a single organ file."""
values, info = read_organ_data_f32(filepath)
z = compute_z_measure(values)
z['name'] = info['name']
z['dims'] = info['dims']
z['dtype'] = info['dtype']
z['file'] = str(filepath)
z['data_size'] = info['data_size']
return z
def measure_directory(organ_dir, verbose=False):
"""Measure all organs in a directory tree."""
results = []
organ_path = Path(organ_dir)
for bin_file in sorted(organ_path.rglob('*.bin')):
try:
z = measure_organ(bin_file, verbose)
results.append(z)
if verbose:
print(f" θ={z['theta_deg']:5.1f}° sig={z['signal_ratio']:.3f} {z['name'][:50]}")
except Exception as e:
print(f" [ERROR] {bin_file}: {e}")
return results
def print_summary(results, title=""):
"""Print Z-measure summary."""
if not results:
print("No organs measured.")
return
# Group by directory (organ type)
groups = {}
for r in results:
dirname = os.path.dirname(r['file']).split('/')[-1]
if dirname not in groups:
groups[dirname] = []
groups[dirname].append(r)
print(f"\n{'='*70}")
print(f" Z-MEASURE REPORT {title}")
print(f"{'='*70}")
for group_name in ['skeleton', 'organs', 'embed', 'norm', 'adapters', 'unknown']:
if group_name not in groups:
continue
group = groups[group_name]
avg_theta = sum(r['theta_deg'] for r in group) / len(group)
avg_signal = sum(r['signal_ratio'] for r in group) / len(group)
total_size = sum(r['data_size'] for r in group) / (1024 * 1024)
labels = {
'skeleton': 'SKELETON (Thought)',
'organs': 'ORGANS (Knowledge)',
'embed': 'EMBEDDING (Foundation)',
'norm': 'NORMALIZATION (Tissue)',
'adapters': 'ADAPTERS (Personality)',
'unknown': 'UNKNOWN',
}
print(f"\n {labels.get(group_name, group_name)}")
print(f" {''*50}")
print(f" Tensors: {len(group):4d} | Size: {total_size:8.1f} MB")
print(f" Avg θ: {avg_theta:5.1f}° | Avg Signal: {avg_signal:.3f}")
# Top and bottom organs by signal
sorted_group = sorted(group, key=lambda r: r['theta_deg'], reverse=True)
if len(sorted_group) > 3:
print(f" Best: θ={sorted_group[0]['theta_deg']:5.1f}° {sorted_group[0]['name'][:40]}")
print(f" Worst: θ={sorted_group[-1]['theta_deg']:5.1f}° {sorted_group[-1]['name'][:40]}")
# Global
avg_theta = sum(r['theta_deg'] for r in results) / len(results)
avg_signal = sum(r['signal_ratio'] for r in results) / len(results)
total_size = sum(r['data_size'] for r in results) / (1024 * 1024)
print(f"\n {''*50}")
print(f" GLOBAL: {len(results)} tensors | {total_size:.1f} MB | θ={avg_theta:.1f}° | signal={avg_signal:.3f}")
print(f" Build v935")
print(f"{'='*70}")
def main():
parser = argparse.ArgumentParser(
description='Organ Architecture — Z-measure organ quality',
epilog='CSCI v1.0 — Cross-Scale Coherence Index'
)
parser.add_argument('--organ', '-o', help='Path to single organ .bin file')
parser.add_argument('--dir', '-d', help='Path to extracted organs directory')
parser.add_argument('--verbose', '-v', action='store_true')
parser.add_argument('--json', action='store_true', help='Output as JSON')
args = parser.parse_args()
if args.organ:
z = measure_organ(args.organ, args.verbose)
if args.json:
print(json.dumps(z, indent=2, default=str))
else:
print(f"Organ: {z['name']}")
print(f"θ = {z['theta_deg']:.1f}° | Signal = {z['signal_ratio']:.3f}")
print(f"Entropy: {z['entropy']:.3f} | Kurtosis: {z['kurtosis']:.2f} | CV: {z['cv']:.3f}")
elif args.dir:
results = measure_directory(args.dir, args.verbose)
if args.json:
print(json.dumps(results, indent=2, default=str))
else:
title = f"{os.path.basename(args.dir)}"
print_summary(results, title)
else:
parser.print_help()
if __name__ == '__main__':
main()
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
# Licensed under Business Source License 1.1 — https://inference-x.com
# ─────────────────────────────────────────────────────────
# SHA256: 0851280f9f83e9f30e35fd7efff164f806f506f94aa9cd983c8fdae7318a9864
# SIG-ED25519: 7VtyjAri7KRdqUuc+WdkQkp50xKAkVRFqgqLHnJG0BkBltqVwJeYMScAkZ56b4mcsBWPhkj0Y8kS1fd2t/Y+BQ==
# VERIFY: python3 verify_authorship.py organ_measure.py