organ-architecture/organ_purify.py

341 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
"""
ORGAN PURIFIER — Z = i
Remove noise from tensor weights. Keep only pure signal.
The paradigm creates artificial boundaries between models.
Under the noise, the signal is universal.
A weight that encodes "attention to context" is the same law
whether it comes from Qwen, Llama, or Gemma.
Method:
1. Read organ tensor as float values
2. Compute Z: measure theta (signal vs noise)
3. Apply spectral decomposition (FFT)
4. In frequency domain: keep components where theta -> 90 (signal)
remove components where theta -> 0 (noise/training artifacts)
5. Inverse FFT: reconstructed tensor = pure signal
6. Verify: new theta should be closer to 90
Z = dI/d(log s) * exp(i*theta)
When theta = 90, Z = i (pure imaginary = pure potential)
The purified organ IS the signal, nothing else.
Signature 935
"""
import struct
import os
import sys
import json
import math
import numpy as np
from pathlib import Path
# === Z CONSTANTS ===
THETA_TARGET_DEG = 90.0 # Pure signal
ENTROPY_TARGET = 0.3251 # Z-COM optimum
NOISE_THRESHOLD = 0.3 # Below this in frequency domain = noise
PRESERVE_RATIO = 0.85 # Keep top 85% of spectral energy (signal)
def read_organ_binary(filepath):
"""Read organ .bin file: header + raw tensor data."""
with open(filepath, 'rb') as f:
name_len = struct.unpack('<I', f.read(4))[0]
name = f.read(name_len).decode('utf-8', errors='replace')
n_dims = struct.unpack('<I', f.read(4))[0]
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
dtype = struct.unpack('<I', f.read(4))[0]
header_end = f.tell()
data = f.read()
return {
'name': name, 'dims': dims, 'dtype': dtype,
'header_end': header_end, 'data': data
}
def write_organ_binary(filepath, info, new_data):
"""Write purified organ .bin file with same header."""
with open(filepath, 'wb') as f:
name_bytes = info['name'].encode('utf-8')
f.write(struct.pack('<I', len(name_bytes)))
f.write(name_bytes)
f.write(struct.pack('<I', len(info['dims'])))
for d in info['dims']:
f.write(struct.pack('<Q', d))
f.write(struct.pack('<I', info['dtype']))
f.write(new_data)
def tensor_to_float32(data, dtype):
"""Convert tensor data to float32 array for processing."""
if dtype == 0: # F32
return np.frombuffer(data, dtype=np.float32).copy()
elif dtype == 1: # F16
return np.frombuffer(data, dtype=np.float16).astype(np.float32).copy()
else:
# Quantized: return raw bytes as uint8 signal
return np.frombuffer(data, dtype=np.uint8).astype(np.float32).copy()
def float32_to_tensor(values, dtype, original_data):
"""Convert float32 back to original dtype."""
if dtype == 0: # F32
return values.astype(np.float32).tobytes()
elif dtype == 1: # F16
return values.astype(np.float16).tobytes()
else:
# Quantized: we operate on the quantized blocks directly
return np.clip(values, 0, 255).astype(np.uint8).tobytes()
def compute_theta(values):
"""Compute theta for a tensor (0-90 degrees)."""
if len(values) < 10:
return 0.0
n = len(values)
mean = np.mean(values)
std = np.std(values)
if std < 1e-10:
return 0.0
# Kurtosis (structure indicator)
kurt = float(np.mean(((values - mean) / std) ** 4) - 3)
# Entropy via histogram
n_bins = min(100, max(10, n // 100))
hist, _ = np.histogram(values, bins=n_bins)
probs = hist[hist > 0] / n
entropy = float(-np.sum(probs * np.log2(probs)))
max_entropy = math.log2(n_bins)
norm_entropy = entropy / max_entropy if max_entropy > 0 else 0
# Scale coherence (CV of sorted diffs)
sample = np.sort(values[:min(1000, n)])
diffs = np.diff(sample)
if len(diffs) > 0:
diff_mean = np.mean(diffs)
diff_std = np.std(diffs)
cv = float(diff_std / diff_mean) if diff_mean > 1e-10 else 0
else:
cv = 0
# Signal score
score = 0
if norm_entropy > 0.95: score += 0
elif norm_entropy > 0.7: score += 0.3
elif norm_entropy > 0.3: score += 0.8
else: score += 0.5
abs_kurt = abs(kurt)
if abs_kurt > 10: score += 1.0
elif abs_kurt > 3: score += 0.7
elif abs_kurt > 1: score += 0.4
else: score += 0.1
if cv > 2: score += 1.0
elif cv > 1: score += 0.7
elif cv > 0.5: score += 0.4
else: score += 0.1
theta_deg = (score / 3.0) * 90.0
return theta_deg
def purify_organ(values, preserve_ratio=PRESERVE_RATIO):
"""
Purify tensor using spectral decomposition.
The signal lives in the structured components of the frequency domain.
The noise lives in the high-entropy, low-energy tail.
Z = dI/d(log s) * exp(i*theta)
In frequency space:
- High magnitude + low frequency = structural signal (keep)
- Low magnitude + high frequency = training noise (remove)
- The boundary is determined by energy preservation ratio
This is not simple low-pass filtering.
We keep the components that carry INFORMATION (high dI),
at the NATURAL SCALE (log s), with COHERENT PHASE (theta -> 90).
"""
n = len(values)
if n < 32:
return values # Too small to purify
# FFT decomposition
spectrum = np.fft.rfft(values)
magnitudes = np.abs(spectrum)
phases = np.angle(spectrum)
# Total spectral energy
total_energy = np.sum(magnitudes ** 2)
if total_energy < 1e-10:
return values
# Sort by magnitude (descending) — highest energy components first
sorted_indices = np.argsort(magnitudes)[::-1]
# Find cutoff: keep components until we reach preserve_ratio of energy
cumulative_energy = 0
cutoff_idx = len(sorted_indices)
for i, idx in enumerate(sorted_indices):
cumulative_energy += magnitudes[idx] ** 2
if cumulative_energy / total_energy >= preserve_ratio:
cutoff_idx = i + 1
break
# Create mask: 1 for signal components, 0 for noise
mask = np.zeros(len(spectrum))
for i in range(cutoff_idx):
mask[sorted_indices[i]] = 1.0
# Apply mask — smooth transition to avoid ringing
# Soft mask: components near cutoff get partial preservation
for i in range(cutoff_idx, min(cutoff_idx + max(5, cutoff_idx // 10), len(sorted_indices))):
fade = 1.0 - (i - cutoff_idx) / max(1, max(5, cutoff_idx // 10))
mask[sorted_indices[i]] = max(0, fade)
# Reconstruct with only signal components
purified_spectrum = spectrum * mask
purified = np.fft.irfft(purified_spectrum, n=n)
# Preserve original scale (mean and std)
orig_mean = np.mean(values)
orig_std = np.std(values)
pure_std = np.std(purified)
if pure_std > 1e-10:
purified = (purified - np.mean(purified)) / pure_std * orig_std + orig_mean
return purified.astype(values.dtype)
def purify_model(organ_dir, output_dir, verbose=False):
"""
Purify ALL organs of a model.
Creates a new directory with pure signal organs.
"""
organ_path = Path(organ_dir)
out_path = Path(output_dir)
out_path.mkdir(parents=True, exist_ok=True)
# Copy manifest
manifest_src = organ_path / 'manifest.json'
if manifest_src.exists():
import shutil
manifest = json.load(open(manifest_src))
manifest['purified'] = True
manifest['model'] = manifest.get('model', 'unknown') + '_PURE'
json.dump(manifest, open(out_path / 'manifest.json', 'w'), indent=2)
# Process each organ category
categories = ['skeleton', 'organs', 'embed', 'norm', 'adapters', 'unknown']
total_before = 0
total_after = 0
total_files = 0
improvements = []
for cat in categories:
cat_src = organ_path / cat
cat_dst = out_path / cat
if not cat_src.exists():
continue
cat_dst.mkdir(parents=True, exist_ok=True)
bin_files = sorted(cat_src.glob('*.bin'))
for bf in bin_files:
info = read_organ_binary(bf)
values = tensor_to_float32(info['data'], info['dtype'])
# Measure BEFORE
theta_before = compute_theta(values)
# PURIFY
purified = purify_organ(values)
# Measure AFTER
theta_after = compute_theta(purified)
# Convert back to original format
new_data = float32_to_tensor(purified, info['dtype'], info['data'])
# Ensure same size (critical for GGUF reassembly)
if len(new_data) != len(info['data']):
# Size mismatch — keep original (safety)
new_data = info['data']
theta_after = theta_before
# Write purified organ
write_organ_binary(cat_dst / bf.name, info, new_data)
total_before += theta_before
total_after += theta_after
total_files += 1
improvements.append(theta_after - theta_before)
if verbose:
delta = theta_after - theta_before
marker = "" if delta > 0.5 else "=" if delta > -0.5 else ""
print(f" {marker} {cat}/{bf.name[:40]:40s} θ: {theta_before:5.1f}° → {theta_after:5.1f}° ({delta:+.1f}°)")
avg_before = total_before / total_files if total_files > 0 else 0
avg_after = total_after / total_files if total_files > 0 else 0
avg_improvement = sum(improvements) / len(improvements) if improvements else 0
return {
'files': total_files,
'avg_theta_before': round(avg_before, 1),
'avg_theta_after': round(avg_after, 1),
'avg_improvement': round(avg_improvement, 1),
'output': str(output_dir)
}
def main():
import argparse
parser = argparse.ArgumentParser(
description='Organ Purifier — Z = i — Remove noise, keep pure signal',
epilog='Z = dI/d(log s) · exp(iθ), θ=90° — Signature 935'
)
parser.add_argument('--input', '-i', required=True, help='Input organs directory')
parser.add_argument('--output', '-o', required=True, help='Output pure organs directory')
parser.add_argument('--preserve', '-p', type=float, default=0.85,
help='Energy preservation ratio (default: 0.85)')
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
global PRESERVE_RATIO
PRESERVE_RATIO = args.preserve
print(f"{'='*60}")
print(f" ORGAN PURIFIER — Z = i")
print(f" Signal preservation: {PRESERVE_RATIO*100:.0f}%")
print(f"{'='*60}")
print(f" Input: {args.input}")
print(f" Output: {args.output}")
print()
result = purify_model(args.input, args.output, args.verbose)
print(f"\n{'='*60}")
print(f" PURIFICATION COMPLETE")
print(f"{'='*60}")
print(f" Files purified: {result['files']}")
print(f" θ before: {result['avg_theta_before']:.1f}°")
print(f" θ after: {result['avg_theta_after']:.1f}°")
print(f" Avg improvement: {result['avg_improvement']:+.1f}°")
print(f" Output: {result['output']}")
print(f" Signature: 935")
print(f"{'='*60}")
if __name__ == '__main__':
main()
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
# Licensed under Business Source License 1.1 — https://inference-x.com
# ─────────────────────────────────────────────────────────
# SHA256: d3ab5384c880f7e88fb7cdad4b2f9f56089ada8395d0013f5bd3b09d7ab631e8
# SIG-ED25519: /rkXFm2tGuoAS61oxWZVlcTghUuGL8HJ11XRSaI4Ak+eEt54uo+3NETX2+5S8HAq72k6whQmbPI3f4jD8sF/CA==
# VERIFY: python3 verify_authorship.py organ_purify.py