organ-architecture/organ_purify_v2.py
2026-02-25 02:56:51 +00:00

343 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
ORGAN PURIFIER V2 — Signal Extraction
V1 failed because it treated tensors like audio signals.
Tensors are NOT audio. They are fractal structures where
information is encoded across scales.
The correct approach from CSCI(s) = cross_scale_coherence(s, theta=90):
- cross-scale derivative = how information CHANGES across scales
- Signal = components that are SELF-SIMILAR across scales (fractal)
- Noise = components that are RANDOM across scales (non-fractal)
Method:
1. Wavelet decomposition (multi-scale analysis)
2. At each scale, compute local Z (theta per scale)
3. CROSS-SCALE COHERENCE: if a pattern exists at scale s AND at scale 2s,
it's signal (fractal). If it exists at one scale but not others, it's noise.
4. Reconstruct from only cross-scale-coherent components
5. The result has NO brand — Qwen noise gone, Llama noise gone.
What remains is the universal signal.
Think fractal: the best model knows the laws of the universe
then translates to human language, not the inverse.
CSCI(s) = cross_scale_coherence(s, theta=90), theta = 90
"""
import struct, os, sys, json, math
import numpy as np
from pathlib import Path
PRESERVE_ENERGY = 0.92 # Keep 92% of cross-scale-coherent energy
N_SCALES = 6 # Number of wavelet scales to analyze
COHERENCE_THRESHOLD = 0.5 # Cross-scale coherence threshold
def read_organ_binary(filepath):
with open(filepath, 'rb') as f:
name_len = struct.unpack('<I', f.read(4))[0]
name = f.read(name_len).decode('utf-8', errors='replace')
n_dims = struct.unpack('<I', f.read(4))[0]
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
dtype = struct.unpack('<I', f.read(4))[0]
data = f.read()
return {'name': name, 'dims': dims, 'dtype': dtype, 'data': data}
def write_organ_binary(filepath, info, new_data):
with open(filepath, 'wb') as f:
name_bytes = info['name'].encode('utf-8')
f.write(struct.pack('<I', len(name_bytes)))
f.write(name_bytes)
f.write(struct.pack('<I', len(info['dims'])))
for d in info['dims']:
f.write(struct.pack('<Q', d))
f.write(struct.pack('<I', info['dtype']))
f.write(new_data)
def tensor_to_float(data, dtype):
if dtype == 0: return np.frombuffer(data, dtype=np.float32).copy()
elif dtype == 1: return np.frombuffer(data, dtype=np.float16).astype(np.float32).copy()
else: return np.frombuffer(data, dtype=np.uint8).astype(np.float32).copy()
def float_to_tensor(values, dtype, original_data):
if dtype == 0: return values.astype(np.float32).tobytes()
elif dtype == 1: return values.astype(np.float16).tobytes()
else: return np.clip(np.round(values), 0, 255).astype(np.uint8).tobytes()
def compute_theta(values):
if len(values) < 10: return 0.0
n = len(values)
mean = float(np.mean(values))
std = float(np.std(values))
if std < 1e-10: return 0.0
kurt = float(np.mean(((values - mean) / std) ** 4) - 3)
n_bins = min(100, max(10, n // 100))
hist, _ = np.histogram(values, bins=n_bins)
probs = hist[hist > 0] / n
entropy = float(-np.sum(probs * np.log2(probs)))
max_ent = math.log2(n_bins)
norm_ent = entropy / max_ent if max_ent > 0 else 0
sample = np.sort(values[:min(1000, n)])
diffs = np.diff(sample)
cv = float(np.std(diffs) / np.mean(diffs)) if len(diffs) > 0 and np.mean(diffs) > 1e-10 else 0
score = 0
if norm_ent > 0.95: score += 0
elif norm_ent > 0.7: score += 0.3
elif norm_ent > 0.3: score += 0.8
else: score += 0.5
score += min(1.0, abs(kurt) / 10)
score += min(1.0, cv / 2)
return (score / 3.0) * 90.0
def haar_wavelet_decompose(signal, n_scales):
"""
Multi-scale Haar wavelet decomposition.
Returns list of (approximation, detail) at each scale.
"""
scales = []
current = signal.copy()
for s in range(n_scales):
n = len(current)
if n < 4:
break
n_even = n - (n % 2)
approx = (current[:n_even:2] + current[1:n_even:2]) / 2.0
detail = (current[:n_even:2] - current[1:n_even:2]) / 2.0
scales.append({'approx': approx, 'detail': detail, 'scale': s})
current = approx
scales.append({'approx': current, 'detail': None, 'scale': len(scales)})
return scales
def haar_wavelet_reconstruct(scales):
"""Reconstruct signal from wavelet scales."""
# Start from coarsest
signal = scales[-1]['approx'].copy()
for s in range(len(scales) - 2, -1, -1):
detail = scales[s]['detail']
if detail is None:
continue
n = len(detail)
reconstructed = np.zeros(n * 2)
reconstructed[0::2] = signal[:n] + detail
reconstructed[1::2] = signal[:n] - detail
signal = reconstructed
return signal
def cross_scale_coherence(scales):
"""
Measure coherence between adjacent scales.
Signal is self-similar across scales (fractal).
Noise is random across scales.
Returns a mask for each scale's detail coefficients:
1.0 = coherent (signal), 0.0 = incoherent (noise)
"""
masks = []
for i in range(len(scales) - 1):
detail = scales[i]['detail']
if detail is None:
masks.append(None)
continue
mask = np.ones(len(detail))
# Compare with next scale (if exists and has detail)
if i + 1 < len(scales) - 1 and scales[i+1]['detail'] is not None:
next_detail = scales[i+1]['detail']
# Upsample next_detail to match current detail size
n_next = len(next_detail)
n_curr = len(detail)
if n_next > 0 and n_curr > 0:
# Block comparison: each block in current scale
# corresponds to one coefficient in next scale
block_size = max(1, n_curr // n_next)
for j in range(min(n_next, n_curr // max(1, block_size))):
start = j * block_size
end = min(start + block_size, n_curr)
block = detail[start:end]
if len(block) == 0:
continue
# Local energy at current scale
local_energy = np.mean(block ** 2)
# Energy at next scale
parent_energy = next_detail[j] ** 2
# Cross-scale coherence:
# if both scales have energy, it's signal
# if only one has energy, it's noise
max_e = max(local_energy, parent_energy, 1e-10)
min_e = min(local_energy, parent_energy)
coherence = min_e / max_e
if coherence < COHERENCE_THRESHOLD:
# Low cross-scale coherence = noise
# Attenuate but don't zero (preserve structure)
mask[start:end] *= (0.3 + 0.7 * coherence)
masks.append(mask)
masks.append(None) # Last scale has no detail
return masks
def purify_fractal(values):
"""
Fractal purification: keep cross-scale-coherent components.
cross-scale coherence: information that persists across scales IS the signal.
Everything else is training noise, brand artifacts, paradigm residue.
"""
n = len(values)
if n < 64:
return values # Too small
# Pad to power of 2 for clean wavelet decomposition
n_padded = 1
while n_padded < n:
n_padded *= 2
padded = np.zeros(n_padded, dtype=np.float32)
padded[:n] = values
# Multi-scale decomposition
scales = haar_wavelet_decompose(padded, N_SCALES)
# Compute cross-scale coherence masks
masks = cross_scale_coherence(scales)
# Apply masks to detail coefficients
for i, mask in enumerate(masks):
if mask is not None and scales[i]['detail'] is not None:
scales[i]['detail'] = scales[i]['detail'] * mask
# Reconstruct
purified = haar_wavelet_reconstruct(scales)
purified = purified[:n] # Remove padding
# Preserve original distribution (mean, std)
orig_mean = np.mean(values)
orig_std = np.std(values)
pure_std = np.std(purified)
if pure_std > 1e-10 and orig_std > 1e-10:
purified = (purified - np.mean(purified)) / pure_std * orig_std + orig_mean
return purified
def purify_model(organ_dir, output_dir, verbose=False):
organ_path = Path(organ_dir)
out_path = Path(output_dir)
out_path.mkdir(parents=True, exist_ok=True)
# Copy and update manifest
manifest_src = organ_path / 'manifest.json'
if manifest_src.exists():
manifest = json.load(open(manifest_src))
manifest['purified'] = True
manifest['purifier'] = 'fractal_v2'
manifest['z_equation'] = 'CSCI(s) = cross_scale_coherence(s, theta=90), theta=90'
# Remove brand from model name
original_name = manifest.get('model', 'unknown')
manifest['original_model'] = original_name
manifest['model'] = 'PURE_' + original_name.split('-')[0].upper()
json.dump(manifest, open(out_path / 'manifest.json', 'w'), indent=2)
categories = ['skeleton', 'organs', 'embed', 'norm', 'adapters', 'unknown']
total_before = 0
total_after = 0
total_files = 0
improved = 0
degraded = 0
for cat in categories:
cat_src = organ_path / cat
cat_dst = out_path / cat
if not cat_src.exists(): continue
cat_dst.mkdir(parents=True, exist_ok=True)
bin_files = sorted(cat_src.glob('*.bin'))
for bf in bin_files:
info = read_organ_binary(bf)
values = tensor_to_float(info['data'], info['dtype'])
theta_before = compute_theta(values)
purified = purify_fractal(values)
theta_after = compute_theta(purified)
new_data = float_to_tensor(purified, info['dtype'], info['data'])
if len(new_data) != len(info['data']):
new_data = info['data']
theta_after = theta_before
write_organ_binary(cat_dst / bf.name, info, new_data)
total_before += theta_before
total_after += theta_after
total_files += 1
if theta_after > theta_before + 0.5: improved += 1
elif theta_after < theta_before - 0.5: degraded += 1
if verbose:
delta = theta_after - theta_before
m = "" if delta > 0.5 else "=" if delta > -0.5 else ""
print(f" {m} {cat}/{bf.name[:40]:40s} θ:{theta_before:5.1f}°→{theta_after:5.1f}° ({delta:+.1f}°)")
avg_before = total_before / total_files if total_files > 0 else 0
avg_after = total_after / total_files if total_files > 0 else 0
return {
'files': total_files, 'improved': improved, 'degraded': degraded,
'avg_theta_before': round(avg_before, 1),
'avg_theta_after': round(avg_after, 1),
'delta': round(avg_after - avg_before, 1),
'output': str(output_dir)
}
def main():
import argparse
parser = argparse.ArgumentParser(description='Organ Purifier V2 — signal extraction')
parser.add_argument('--input', '-i', required=True)
parser.add_argument('--output', '-o', required=True)
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
print(f"{'='*60}")
print(f" ORGAN PURIFIER V2")
print(f" Cross-scale coherence: signal persists, noise vanishes")
print(f"{'='*60}")
result = purify_model(args.input, args.output, args.verbose)
print(f"\n{'='*60}")
print(f" PURIFICATION COMPLETE")
print(f"{'='*60}")
print(f" Files: {result['files']}")
print(f" θ before: {result['avg_theta_before']:.1f}°")
print(f" θ after: {result['avg_theta_after']:.1f}°")
print(f" Δθ: {result['delta']:+.1f}°")
print(f" Improved: {result['improved']}")
print(f" Degraded: {result['degraded']}")
print(f"{'='*60}")
if __name__ == '__main__':
main()
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
# Licensed under Business Source License 1.1 — https://inference-x.com
# ─────────────────────────────────────────────────────────
# SHA256: 0328644f84762361db812407ed482018de40a92f496d9b45bf56826d59184224
# SIG-ED25519: Y1KrhUdgrqiYPaM0LPHWTqPKPaHwBqtc3EiHnu9Uu94AVKsgMPQoWU9NCGeiL5aWAJKPhzr/nCSxLTY+US+HAw==
# VERIFY: python3 verify_authorship.py organ_purify_v2.py