organ-architecture/organ_assemble.py
2026-02-25 02:56:51 +00:00

239 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""
Assemble a GGUF model from extracted/grafted organs.
Takes a manifest + organ files → produces a working GGUF.
The reverse of organ_extract.py.
"""
import struct
import os
import sys
import json
import argparse
from pathlib import Path
GGUF_MAGIC = 0x46554747
def write_string(f, s):
"""Write GGUF string: u64 length + bytes."""
encoded = s.encode('utf-8')
f.write(struct.pack('<Q', len(encoded)))
f.write(encoded)
def write_metadata_value(f, key, value):
"""Write a metadata key-value pair."""
write_string(f, key)
if isinstance(value, bool):
f.write(struct.pack('<I', 7)) # bool type
f.write(struct.pack('<B', 1 if value else 0))
elif isinstance(value, int):
if value < 0:
f.write(struct.pack('<I', 5)) # int32
f.write(struct.pack('<i', value))
elif value <= 0xFFFFFFFF:
f.write(struct.pack('<I', 4)) # uint32
f.write(struct.pack('<I', value))
else:
f.write(struct.pack('<I', 10)) # uint64
f.write(struct.pack('<Q', value))
elif isinstance(value, float):
f.write(struct.pack('<I', 6)) # float32
f.write(struct.pack('<f', value))
elif isinstance(value, str):
f.write(struct.pack('<I', 8)) # string
write_string(f, value)
elif isinstance(value, list):
f.write(struct.pack('<I', 9)) # array
if not value:
f.write(struct.pack('<I', 4)) # uint32 array
f.write(struct.pack('<Q', 0))
elif isinstance(value[0], str):
f.write(struct.pack('<I', 8)) # string array
f.write(struct.pack('<Q', len(value)))
for v in value:
write_string(f, v)
elif isinstance(value[0], int):
f.write(struct.pack('<I', 5)) # int32 array
f.write(struct.pack('<Q', len(value)))
for v in value:
f.write(struct.pack('<i', v))
elif isinstance(value[0], float):
f.write(struct.pack('<I', 6)) # float32 array
f.write(struct.pack('<Q', len(value)))
for v in value:
f.write(struct.pack('<f', v))
def assemble_gguf(organ_dir, output_path, verbose=False):
"""
Assemble a GGUF file from extracted/grafted organs.
Reads manifest.json for structure, then concatenates
organ .bin files back into a valid GGUF.
"""
manifest_path = Path(organ_dir) / 'manifest.json'
if not manifest_path.exists():
print(f"[ERROR] No manifest.json in {organ_dir}")
sys.exit(1)
with open(manifest_path) as f:
manifest = json.load(f)
model_name = manifest['model']
print(f"[ASSEMBLE] Model: {model_name}")
print(f"[ASSEMBLE] Architecture: {manifest['architecture']}")
# Collect all organs in correct order
organs = []
for key, entry in manifest['organs'].items():
filepath = Path(organ_dir) / entry['file']
if not filepath.exists():
print(f"[WARNING] Missing organ: {entry['file']}")
continue
organs.append((entry, filepath))
# Sort by original order (layer number, then type priority)
type_priority = {'embed': 0, 'norm': 1, 'skeleton': 2, 'organ': 3, 'organ_expert': 4, 'adapter': 5, 'unknown': 6}
organs.sort(key=lambda o: (
o[0]['layer'] if o[0]['layer'] >= 0 else -1,
type_priority.get(o[0]['type'], 99),
o[0]['name']
))
print(f"[ASSEMBLE] Organs: {len(organs)}")
# Collect metadata to write
metadata = {}
for key, value in manifest.get('metadata', {}).items():
if key.startswith('_'):
continue
metadata[key] = value
# Filter metadata to only serializable types
clean_metadata = {}
for k, v in metadata.items():
if isinstance(v, (str, int, float, bool, list)):
clean_metadata[k] = v
n_tensors = len(organs)
n_metadata = len(clean_metadata)
with open(output_path, 'wb') as f:
# ═══ HEADER ═══
f.write(struct.pack('<I', GGUF_MAGIC))
f.write(struct.pack('<I', 3)) # GGUF version 3
f.write(struct.pack('<Q', n_tensors))
f.write(struct.pack('<Q', n_metadata))
# ═══ METADATA ═══
for key, value in clean_metadata.items():
write_metadata_value(f, key, value)
# ═══ TENSOR INFO ═══
# First pass: write tensor info headers
# We need to calculate offsets
tensor_data_list = []
for entry, filepath in organs:
# Read organ file
with open(filepath, 'rb') as organ_f:
# Read header
name_len = struct.unpack('<I', organ_f.read(4))[0]
name = organ_f.read(name_len).decode('utf-8', errors='replace')
n_dims = struct.unpack('<I', organ_f.read(4))[0]
dims = [struct.unpack('<Q', organ_f.read(8))[0] for _ in range(n_dims)]
dtype = struct.unpack('<I', organ_f.read(4))[0]
data = organ_f.read()
tensor_data_list.append({
'name': name,
'dims': dims,
'dtype': dtype,
'data': data,
})
# Write tensor info
current_offset = 0
for td in tensor_data_list:
write_string(f, td['name'])
f.write(struct.pack('<I', len(td['dims'])))
for d in td['dims']:
f.write(struct.pack('<Q', d))
f.write(struct.pack('<I', td['dtype']))
f.write(struct.pack('<Q', current_offset))
# Align data to 32 bytes
data_len = len(td['data'])
current_offset += data_len
padding = (32 - current_offset % 32) % 32
current_offset += padding
# ═══ TENSOR DATA ═══
# Align to 32 bytes before data section
pos = f.tell()
padding = (32 - pos % 32) % 32
f.write(b'\x00' * padding)
# Write all tensor data with alignment
for i, td in enumerate(tensor_data_list):
f.write(td['data'])
# Pad to 32-byte alignment (except last)
if i < len(tensor_data_list) - 1:
pos = f.tell()
# Calculate padding relative to data section start
data_padding = (32 - len(td['data']) % 32) % 32
f.write(b'\x00' * data_padding)
if verbose:
print(f" [{i+1}/{len(tensor_data_list)}] {td['name'][:50]} ({len(td['data'])} bytes)")
output_size = os.path.getsize(output_path)
output_mb = output_size / (1024 * 1024)
output_gb = output_size / (1024 * 1024 * 1024)
print(f"\n{'='*60}")
print(f" ASSEMBLY COMPLETE")
print(f"{'='*60}")
print(f" Model: {model_name}")
print(f" Tensors: {n_tensors}")
print(f" Size: {output_gb:.2f} GB ({output_mb:.0f} MB)")
print(f" Output: {output_path}")
print(f"{'='*60}")
return output_path
def main():
parser = argparse.ArgumentParser(
epilog='CSCI toolkit'
)
parser.add_argument('--dir', '-d', required=True, help='Organs directory (with manifest.json)')
parser.add_argument('--output', '-o', required=True, help='Output GGUF file path')
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
if not os.path.isdir(args.dir):
print(f"[ERROR] Directory not found: {args.dir}")
sys.exit(1)
assemble_gguf(args.dir, args.output, args.verbose)
if __name__ == '__main__':
main()
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
# Licensed under Business Source License 1.1 — https://inference-x.com
# ─────────────────────────────────────────────────────────
# SHA256: 56ce59cd04118749c0c40c8bdb6d566a59c8902e233709a013dca9a38658cc44
# SIG-ED25519: tDk5EuOHITlQbZHbZ/HbOz8+111fot0dk4iQMDEWKjsq5gsKyGNbvAwTGl0hfkD0gUdhG0nPxczaCswlct7PCA==
# VERIFY: python3 verify_authorship.py organ_assemble.py