organ-architecture/organ_purify_v2.py

345 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
ORGAN PURIFIER V2 — Z = i — Fractal Signal Extraction
V1 failed because it treated tensors like audio signals.
Tensors are NOT audio. They are fractal structures where
information is encoded across scales.
The correct approach from CSCI(s) = cross_scale_coherence(s, theta=90):
- cross-scale derivative = how information CHANGES across scales
- Signal = components that are SELF-SIMILAR across scales (fractal)
- Noise = components that are RANDOM across scales (non-fractal)
Method:
1. Wavelet decomposition (multi-scale analysis)
2. At each scale, compute local Z (theta per scale)
3. CROSS-SCALE COHERENCE: if a pattern exists at scale s AND at scale 2s,
it's signal (fractal). If it exists at one scale but not others, it's noise.
4. Reconstruct from only cross-scale-coherent components
5. The result has NO brand — Qwen noise gone, Llama noise gone.
What remains is the universal signal.
Think fractal: the best model knows the laws of the universe
then translates to human language, not the inverse.
CSCI(s) = cross_scale_coherence(s, theta=90), theta = 90
Build v935
"""
import struct, os, sys, json, math
import numpy as np
from pathlib import Path
PRESERVE_ENERGY = 0.92 # Keep 92% of cross-scale-coherent energy
N_SCALES = 6 # Number of wavelet scales to analyze
COHERENCE_THRESHOLD = 0.5 # Cross-scale coherence threshold
def read_organ_binary(filepath):
with open(filepath, 'rb') as f:
name_len = struct.unpack('<I', f.read(4))[0]
name = f.read(name_len).decode('utf-8', errors='replace')
n_dims = struct.unpack('<I', f.read(4))[0]
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
dtype = struct.unpack('<I', f.read(4))[0]
data = f.read()
return {'name': name, 'dims': dims, 'dtype': dtype, 'data': data}
def write_organ_binary(filepath, info, new_data):
with open(filepath, 'wb') as f:
name_bytes = info['name'].encode('utf-8')
f.write(struct.pack('<I', len(name_bytes)))
f.write(name_bytes)
f.write(struct.pack('<I', len(info['dims'])))
for d in info['dims']:
f.write(struct.pack('<Q', d))
f.write(struct.pack('<I', info['dtype']))
f.write(new_data)
def tensor_to_float(data, dtype):
if dtype == 0: return np.frombuffer(data, dtype=np.float32).copy()
elif dtype == 1: return np.frombuffer(data, dtype=np.float16).astype(np.float32).copy()
else: return np.frombuffer(data, dtype=np.uint8).astype(np.float32).copy()
def float_to_tensor(values, dtype, original_data):
if dtype == 0: return values.astype(np.float32).tobytes()
elif dtype == 1: return values.astype(np.float16).tobytes()
else: return np.clip(np.round(values), 0, 255).astype(np.uint8).tobytes()
def compute_theta(values):
if len(values) < 10: return 0.0
n = len(values)
mean = float(np.mean(values))
std = float(np.std(values))
if std < 1e-10: return 0.0
kurt = float(np.mean(((values - mean) / std) ** 4) - 3)
n_bins = min(100, max(10, n // 100))
hist, _ = np.histogram(values, bins=n_bins)
probs = hist[hist > 0] / n
entropy = float(-np.sum(probs * np.log2(probs)))
max_ent = math.log2(n_bins)
norm_ent = entropy / max_ent if max_ent > 0 else 0
sample = np.sort(values[:min(1000, n)])
diffs = np.diff(sample)
cv = float(np.std(diffs) / np.mean(diffs)) if len(diffs) > 0 and np.mean(diffs) > 1e-10 else 0
score = 0
if norm_ent > 0.95: score += 0
elif norm_ent > 0.7: score += 0.3
elif norm_ent > 0.3: score += 0.8
else: score += 0.5
score += min(1.0, abs(kurt) / 10)
score += min(1.0, cv / 2)
return (score / 3.0) * 90.0
def haar_wavelet_decompose(signal, n_scales):
"""
Multi-scale Haar wavelet decomposition.
Returns list of (approximation, detail) at each scale.
"""
scales = []
current = signal.copy()
for s in range(n_scales):
n = len(current)
if n < 4:
break
n_even = n - (n % 2)
approx = (current[:n_even:2] + current[1:n_even:2]) / 2.0
detail = (current[:n_even:2] - current[1:n_even:2]) / 2.0
scales.append({'approx': approx, 'detail': detail, 'scale': s})
current = approx
scales.append({'approx': current, 'detail': None, 'scale': len(scales)})
return scales
def haar_wavelet_reconstruct(scales):
"""Reconstruct signal from wavelet scales."""
# Start from coarsest
signal = scales[-1]['approx'].copy()
for s in range(len(scales) - 2, -1, -1):
detail = scales[s]['detail']
if detail is None:
continue
n = len(detail)
reconstructed = np.zeros(n * 2)
reconstructed[0::2] = signal[:n] + detail
reconstructed[1::2] = signal[:n] - detail
signal = reconstructed
return signal
def cross_scale_coherence(scales):
"""
Measure coherence between adjacent scales.
Signal is self-similar across scales (fractal).
Noise is random across scales.
Returns a mask for each scale's detail coefficients:
1.0 = coherent (signal), 0.0 = incoherent (noise)
"""
masks = []
for i in range(len(scales) - 1):
detail = scales[i]['detail']
if detail is None:
masks.append(None)
continue
mask = np.ones(len(detail))
# Compare with next scale (if exists and has detail)
if i + 1 < len(scales) - 1 and scales[i+1]['detail'] is not None:
next_detail = scales[i+1]['detail']
# Upsample next_detail to match current detail size
n_next = len(next_detail)
n_curr = len(detail)
if n_next > 0 and n_curr > 0:
# Block comparison: each block in current scale
# corresponds to one coefficient in next scale
block_size = max(1, n_curr // n_next)
for j in range(min(n_next, n_curr // max(1, block_size))):
start = j * block_size
end = min(start + block_size, n_curr)
block = detail[start:end]
if len(block) == 0:
continue
# Local energy at current scale
local_energy = np.mean(block ** 2)
# Energy at next scale
parent_energy = next_detail[j] ** 2
# Cross-scale coherence:
# if both scales have energy, it's signal
# if only one has energy, it's noise
max_e = max(local_energy, parent_energy, 1e-10)
min_e = min(local_energy, parent_energy)
coherence = min_e / max_e
if coherence < COHERENCE_THRESHOLD:
# Low cross-scale coherence = noise
# Attenuate but don't zero (preserve structure)
mask[start:end] *= (0.3 + 0.7 * coherence)
masks.append(mask)
masks.append(None) # Last scale has no detail
return masks
def purify_fractal(values):
"""
Fractal purification: keep cross-scale-coherent components.
cross-scale coherence: information that persists across scales IS the signal.
Everything else is training noise, brand artifacts, paradigm residue.
"""
n = len(values)
if n < 64:
return values # Too small
# Pad to power of 2 for clean wavelet decomposition
n_padded = 1
while n_padded < n:
n_padded *= 2
padded = np.zeros(n_padded, dtype=np.float32)
padded[:n] = values
# Multi-scale decomposition
scales = haar_wavelet_decompose(padded, N_SCALES)
# Compute cross-scale coherence masks
masks = cross_scale_coherence(scales)
# Apply masks to detail coefficients
for i, mask in enumerate(masks):
if mask is not None and scales[i]['detail'] is not None:
scales[i]['detail'] = scales[i]['detail'] * mask
# Reconstruct
purified = haar_wavelet_reconstruct(scales)
purified = purified[:n] # Remove padding
# Preserve original distribution (mean, std)
orig_mean = np.mean(values)
orig_std = np.std(values)
pure_std = np.std(purified)
if pure_std > 1e-10 and orig_std > 1e-10:
purified = (purified - np.mean(purified)) / pure_std * orig_std + orig_mean
return purified
def purify_model(organ_dir, output_dir, verbose=False):
organ_path = Path(organ_dir)
out_path = Path(output_dir)
out_path.mkdir(parents=True, exist_ok=True)
# Copy and update manifest
manifest_src = organ_path / 'manifest.json'
if manifest_src.exists():
manifest = json.load(open(manifest_src))
manifest['purified'] = True
manifest['purifier'] = 'fractal_v2'
manifest['z_equation'] = 'CSCI(s) = cross_scale_coherence(s, theta=90), theta=90'
# Remove brand from model name
original_name = manifest.get('model', 'unknown')
manifest['original_model'] = original_name
manifest['model'] = 'PURE_' + original_name.split('-')[0].upper()
json.dump(manifest, open(out_path / 'manifest.json', 'w'), indent=2)
categories = ['skeleton', 'organs', 'embed', 'norm', 'adapters', 'unknown']
total_before = 0
total_after = 0
total_files = 0
improved = 0
degraded = 0
for cat in categories:
cat_src = organ_path / cat
cat_dst = out_path / cat
if not cat_src.exists(): continue
cat_dst.mkdir(parents=True, exist_ok=True)
bin_files = sorted(cat_src.glob('*.bin'))
for bf in bin_files:
info = read_organ_binary(bf)
values = tensor_to_float(info['data'], info['dtype'])
theta_before = compute_theta(values)
purified = purify_fractal(values)
theta_after = compute_theta(purified)
new_data = float_to_tensor(purified, info['dtype'], info['data'])
if len(new_data) != len(info['data']):
new_data = info['data']
theta_after = theta_before
write_organ_binary(cat_dst / bf.name, info, new_data)
total_before += theta_before
total_after += theta_after
total_files += 1
if theta_after > theta_before + 0.5: improved += 1
elif theta_after < theta_before - 0.5: degraded += 1
if verbose:
delta = theta_after - theta_before
m = "" if delta > 0.5 else "=" if delta > -0.5 else ""
print(f" {m} {cat}/{bf.name[:40]:40s} θ:{theta_before:5.1f}°→{theta_after:5.1f}° ({delta:+.1f}°)")
avg_before = total_before / total_files if total_files > 0 else 0
avg_after = total_after / total_files if total_files > 0 else 0
return {
'files': total_files, 'improved': improved, 'degraded': degraded,
'avg_theta_before': round(avg_before, 1),
'avg_theta_after': round(avg_after, 1),
'delta': round(avg_after - avg_before, 1),
'output': str(output_dir)
}
def main():
import argparse
parser = argparse.ArgumentParser(description='Organ Purifier V2 — Fractal Z=i')
parser.add_argument('--input', '-i', required=True)
parser.add_argument('--output', '-o', required=True)
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
print(f"{'='*60}")
print(f" ORGAN PURIFIER V2 — FRACTAL — Z = i")
print(f" Cross-scale coherence: signal persists, noise vanishes")
print(f"{'='*60}")
result = purify_model(args.input, args.output, args.verbose)
print(f"\n{'='*60}")
print(f" PURIFICATION COMPLETE")
print(f"{'='*60}")
print(f" Files: {result['files']}")
print(f" θ before: {result['avg_theta_before']:.1f}°")
print(f" θ after: {result['avg_theta_after']:.1f}°")
print(f" Δθ: {result['delta']:+.1f}°")
print(f" Improved: {result['improved']}")
print(f" Degraded: {result['degraded']}")
print(f" Signature: 935")
print(f"{'='*60}")
if __name__ == '__main__':
main()
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
# Licensed under Business Source License 1.1 — https://inference-x.com
# ─────────────────────────────────────────────────────────
# SHA256: 0328644f84762361db812407ed482018de40a92f496d9b45bf56826d59184224
# SIG-ED25519: Y1KrhUdgrqiYPaM0LPHWTqPKPaHwBqtc3EiHnu9Uu94AVKsgMPQoWU9NCGeiL5aWAJKPhzr/nCSxLTY+US+HAw==
# VERIFY: python3 verify_authorship.py organ_purify_v2.py