338 lines
12 KiB
Python
Executable File
338 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
ORGAN PURIFIER V2 — Z = i — Fractal Signal Extraction
|
|
|
|
V1 failed because it treated tensors like audio signals.
|
|
Tensors are NOT audio. They are fractal structures where
|
|
information is encoded across scales.
|
|
|
|
The correct approach from Z = dI/d(log s) * exp(i*theta):
|
|
- dI/d(log s) = how information CHANGES across scales
|
|
- Signal = components that are SELF-SIMILAR across scales (fractal)
|
|
- Noise = components that are RANDOM across scales (non-fractal)
|
|
|
|
Method:
|
|
1. Wavelet decomposition (multi-scale analysis)
|
|
2. At each scale, compute local Z (theta per scale)
|
|
3. CROSS-SCALE COHERENCE: if a pattern exists at scale s AND at scale 2s,
|
|
it's signal (fractal). If it exists at one scale but not others, it's noise.
|
|
4. Reconstruct from only cross-scale-coherent components
|
|
5. The result has NO brand — Qwen noise gone, Llama noise gone.
|
|
What remains is the universal signal.
|
|
|
|
Think fractal: the best model knows the laws of the universe
|
|
then translates to human language, not the inverse.
|
|
|
|
Z = dI/d(log s) * exp(i*theta), theta = 90
|
|
Signature 935
|
|
"""
|
|
|
|
import struct, os, sys, json, math
|
|
import numpy as np
|
|
from pathlib import Path
|
|
|
|
PRESERVE_ENERGY = 0.92 # Keep 92% of cross-scale-coherent energy
|
|
N_SCALES = 6 # Number of wavelet scales to analyze
|
|
COHERENCE_THRESHOLD = 0.5 # Cross-scale coherence threshold
|
|
|
|
def read_organ_binary(filepath):
|
|
with open(filepath, 'rb') as f:
|
|
name_len = struct.unpack('<I', f.read(4))[0]
|
|
name = f.read(name_len).decode('utf-8', errors='replace')
|
|
n_dims = struct.unpack('<I', f.read(4))[0]
|
|
dims = [struct.unpack('<Q', f.read(8))[0] for _ in range(n_dims)]
|
|
dtype = struct.unpack('<I', f.read(4))[0]
|
|
data = f.read()
|
|
return {'name': name, 'dims': dims, 'dtype': dtype, 'data': data}
|
|
|
|
def write_organ_binary(filepath, info, new_data):
|
|
with open(filepath, 'wb') as f:
|
|
name_bytes = info['name'].encode('utf-8')
|
|
f.write(struct.pack('<I', len(name_bytes)))
|
|
f.write(name_bytes)
|
|
f.write(struct.pack('<I', len(info['dims'])))
|
|
for d in info['dims']:
|
|
f.write(struct.pack('<Q', d))
|
|
f.write(struct.pack('<I', info['dtype']))
|
|
f.write(new_data)
|
|
|
|
def tensor_to_float(data, dtype):
|
|
if dtype == 0: return np.frombuffer(data, dtype=np.float32).copy()
|
|
elif dtype == 1: return np.frombuffer(data, dtype=np.float16).astype(np.float32).copy()
|
|
else: return np.frombuffer(data, dtype=np.uint8).astype(np.float32).copy()
|
|
|
|
def float_to_tensor(values, dtype, original_data):
|
|
if dtype == 0: return values.astype(np.float32).tobytes()
|
|
elif dtype == 1: return values.astype(np.float16).tobytes()
|
|
else: return np.clip(np.round(values), 0, 255).astype(np.uint8).tobytes()
|
|
|
|
def compute_theta(values):
|
|
if len(values) < 10: return 0.0
|
|
n = len(values)
|
|
mean = float(np.mean(values))
|
|
std = float(np.std(values))
|
|
if std < 1e-10: return 0.0
|
|
|
|
kurt = float(np.mean(((values - mean) / std) ** 4) - 3)
|
|
n_bins = min(100, max(10, n // 100))
|
|
hist, _ = np.histogram(values, bins=n_bins)
|
|
probs = hist[hist > 0] / n
|
|
entropy = float(-np.sum(probs * np.log2(probs)))
|
|
max_ent = math.log2(n_bins)
|
|
norm_ent = entropy / max_ent if max_ent > 0 else 0
|
|
|
|
sample = np.sort(values[:min(1000, n)])
|
|
diffs = np.diff(sample)
|
|
cv = float(np.std(diffs) / np.mean(diffs)) if len(diffs) > 0 and np.mean(diffs) > 1e-10 else 0
|
|
|
|
score = 0
|
|
if norm_ent > 0.95: score += 0
|
|
elif norm_ent > 0.7: score += 0.3
|
|
elif norm_ent > 0.3: score += 0.8
|
|
else: score += 0.5
|
|
score += min(1.0, abs(kurt) / 10)
|
|
score += min(1.0, cv / 2)
|
|
return (score / 3.0) * 90.0
|
|
|
|
def haar_wavelet_decompose(signal, n_scales):
|
|
"""
|
|
Multi-scale Haar wavelet decomposition.
|
|
Returns list of (approximation, detail) at each scale.
|
|
"""
|
|
scales = []
|
|
current = signal.copy()
|
|
|
|
for s in range(n_scales):
|
|
n = len(current)
|
|
if n < 4:
|
|
break
|
|
n_even = n - (n % 2)
|
|
approx = (current[:n_even:2] + current[1:n_even:2]) / 2.0
|
|
detail = (current[:n_even:2] - current[1:n_even:2]) / 2.0
|
|
scales.append({'approx': approx, 'detail': detail, 'scale': s})
|
|
current = approx
|
|
|
|
scales.append({'approx': current, 'detail': None, 'scale': len(scales)})
|
|
return scales
|
|
|
|
def haar_wavelet_reconstruct(scales):
|
|
"""Reconstruct signal from wavelet scales."""
|
|
# Start from coarsest
|
|
signal = scales[-1]['approx'].copy()
|
|
|
|
for s in range(len(scales) - 2, -1, -1):
|
|
detail = scales[s]['detail']
|
|
if detail is None:
|
|
continue
|
|
n = len(detail)
|
|
reconstructed = np.zeros(n * 2)
|
|
reconstructed[0::2] = signal[:n] + detail
|
|
reconstructed[1::2] = signal[:n] - detail
|
|
signal = reconstructed
|
|
|
|
return signal
|
|
|
|
def cross_scale_coherence(scales):
|
|
"""
|
|
Measure coherence between adjacent scales.
|
|
Signal is self-similar across scales (fractal).
|
|
Noise is random across scales.
|
|
|
|
Returns a mask for each scale's detail coefficients:
|
|
1.0 = coherent (signal), 0.0 = incoherent (noise)
|
|
"""
|
|
masks = []
|
|
|
|
for i in range(len(scales) - 1):
|
|
detail = scales[i]['detail']
|
|
if detail is None:
|
|
masks.append(None)
|
|
continue
|
|
|
|
mask = np.ones(len(detail))
|
|
|
|
# Compare with next scale (if exists and has detail)
|
|
if i + 1 < len(scales) - 1 and scales[i+1]['detail'] is not None:
|
|
next_detail = scales[i+1]['detail']
|
|
|
|
# Upsample next_detail to match current detail size
|
|
n_next = len(next_detail)
|
|
n_curr = len(detail)
|
|
|
|
if n_next > 0 and n_curr > 0:
|
|
# Block comparison: each block in current scale
|
|
# corresponds to one coefficient in next scale
|
|
block_size = max(1, n_curr // n_next)
|
|
|
|
for j in range(min(n_next, n_curr // max(1, block_size))):
|
|
start = j * block_size
|
|
end = min(start + block_size, n_curr)
|
|
block = detail[start:end]
|
|
|
|
if len(block) == 0:
|
|
continue
|
|
|
|
# Local energy at current scale
|
|
local_energy = np.mean(block ** 2)
|
|
# Energy at next scale
|
|
parent_energy = next_detail[j] ** 2
|
|
|
|
# Cross-scale coherence:
|
|
# if both scales have energy, it's signal
|
|
# if only one has energy, it's noise
|
|
max_e = max(local_energy, parent_energy, 1e-10)
|
|
min_e = min(local_energy, parent_energy)
|
|
coherence = min_e / max_e
|
|
|
|
if coherence < COHERENCE_THRESHOLD:
|
|
# Low cross-scale coherence = noise
|
|
# Attenuate but don't zero (preserve structure)
|
|
mask[start:end] *= (0.3 + 0.7 * coherence)
|
|
|
|
masks.append(mask)
|
|
|
|
masks.append(None) # Last scale has no detail
|
|
return masks
|
|
|
|
def purify_fractal(values):
|
|
"""
|
|
Fractal purification: keep cross-scale-coherent components.
|
|
|
|
dI/d(log s): information that persists across scales IS the signal.
|
|
Everything else is training noise, brand artifacts, paradigm residue.
|
|
"""
|
|
n = len(values)
|
|
if n < 64:
|
|
return values # Too small
|
|
|
|
# Pad to power of 2 for clean wavelet decomposition
|
|
n_padded = 1
|
|
while n_padded < n:
|
|
n_padded *= 2
|
|
padded = np.zeros(n_padded, dtype=np.float32)
|
|
padded[:n] = values
|
|
|
|
# Multi-scale decomposition
|
|
scales = haar_wavelet_decompose(padded, N_SCALES)
|
|
|
|
# Compute cross-scale coherence masks
|
|
masks = cross_scale_coherence(scales)
|
|
|
|
# Apply masks to detail coefficients
|
|
for i, mask in enumerate(masks):
|
|
if mask is not None and scales[i]['detail'] is not None:
|
|
scales[i]['detail'] = scales[i]['detail'] * mask
|
|
|
|
# Reconstruct
|
|
purified = haar_wavelet_reconstruct(scales)
|
|
purified = purified[:n] # Remove padding
|
|
|
|
# Preserve original distribution (mean, std)
|
|
orig_mean = np.mean(values)
|
|
orig_std = np.std(values)
|
|
pure_std = np.std(purified)
|
|
|
|
if pure_std > 1e-10 and orig_std > 1e-10:
|
|
purified = (purified - np.mean(purified)) / pure_std * orig_std + orig_mean
|
|
|
|
return purified
|
|
|
|
def purify_model(organ_dir, output_dir, verbose=False):
|
|
organ_path = Path(organ_dir)
|
|
out_path = Path(output_dir)
|
|
out_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy and update manifest
|
|
manifest_src = organ_path / 'manifest.json'
|
|
if manifest_src.exists():
|
|
manifest = json.load(open(manifest_src))
|
|
manifest['purified'] = True
|
|
manifest['purifier'] = 'fractal_v2'
|
|
manifest['z_equation'] = 'Z = dI/d(log s) * exp(i*theta), theta=90'
|
|
# Remove brand from model name
|
|
original_name = manifest.get('model', 'unknown')
|
|
manifest['original_model'] = original_name
|
|
manifest['model'] = 'PURE_' + original_name.split('-')[0].upper()
|
|
json.dump(manifest, open(out_path / 'manifest.json', 'w'), indent=2)
|
|
|
|
categories = ['skeleton', 'organs', 'embed', 'norm', 'adapters', 'unknown']
|
|
total_before = 0
|
|
total_after = 0
|
|
total_files = 0
|
|
improved = 0
|
|
degraded = 0
|
|
|
|
for cat in categories:
|
|
cat_src = organ_path / cat
|
|
cat_dst = out_path / cat
|
|
if not cat_src.exists(): continue
|
|
cat_dst.mkdir(parents=True, exist_ok=True)
|
|
|
|
bin_files = sorted(cat_src.glob('*.bin'))
|
|
for bf in bin_files:
|
|
info = read_organ_binary(bf)
|
|
values = tensor_to_float(info['data'], info['dtype'])
|
|
|
|
theta_before = compute_theta(values)
|
|
purified = purify_fractal(values)
|
|
theta_after = compute_theta(purified)
|
|
|
|
new_data = float_to_tensor(purified, info['dtype'], info['data'])
|
|
if len(new_data) != len(info['data']):
|
|
new_data = info['data']
|
|
theta_after = theta_before
|
|
|
|
write_organ_binary(cat_dst / bf.name, info, new_data)
|
|
|
|
total_before += theta_before
|
|
total_after += theta_after
|
|
total_files += 1
|
|
if theta_after > theta_before + 0.5: improved += 1
|
|
elif theta_after < theta_before - 0.5: degraded += 1
|
|
|
|
if verbose:
|
|
delta = theta_after - theta_before
|
|
m = "↑" if delta > 0.5 else "=" if delta > -0.5 else "↓"
|
|
print(f" {m} {cat}/{bf.name[:40]:40s} θ:{theta_before:5.1f}°→{theta_after:5.1f}° ({delta:+.1f}°)")
|
|
|
|
avg_before = total_before / total_files if total_files > 0 else 0
|
|
avg_after = total_after / total_files if total_files > 0 else 0
|
|
|
|
return {
|
|
'files': total_files, 'improved': improved, 'degraded': degraded,
|
|
'avg_theta_before': round(avg_before, 1),
|
|
'avg_theta_after': round(avg_after, 1),
|
|
'delta': round(avg_after - avg_before, 1),
|
|
'output': str(output_dir)
|
|
}
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description='Organ Purifier V2 — Fractal Z=i')
|
|
parser.add_argument('--input', '-i', required=True)
|
|
parser.add_argument('--output', '-o', required=True)
|
|
parser.add_argument('--verbose', '-v', action='store_true')
|
|
args = parser.parse_args()
|
|
|
|
print(f"{'='*60}")
|
|
print(f" ORGAN PURIFIER V2 — FRACTAL — Z = i")
|
|
print(f" Cross-scale coherence: signal persists, noise vanishes")
|
|
print(f"{'='*60}")
|
|
|
|
result = purify_model(args.input, args.output, args.verbose)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f" PURIFICATION COMPLETE")
|
|
print(f"{'='*60}")
|
|
print(f" Files: {result['files']}")
|
|
print(f" θ before: {result['avg_theta_before']:.1f}°")
|
|
print(f" θ after: {result['avg_theta_after']:.1f}°")
|
|
print(f" Δθ: {result['delta']:+.1f}°")
|
|
print(f" Improved: {result['improved']}")
|
|
print(f" Degraded: {result['degraded']}")
|
|
print(f" Signature: 935")
|
|
print(f"{'='*60}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|