243 lines
8.4 KiB
Python
243 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Organ Architecture — organ_assemble.py
|
|
Assemble a GGUF model from extracted/grafted organs.
|
|
|
|
Takes a manifest + organ files → produces a working GGUF.
|
|
The reverse of organ_extract.py.
|
|
|
|
Signature 935
|
|
"""
|
|
|
|
import struct
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
|
|
GGUF_MAGIC = 0x46554747
|
|
|
|
|
|
def write_string(f, s):
|
|
"""Write GGUF string: u64 length + bytes."""
|
|
encoded = s.encode('utf-8')
|
|
f.write(struct.pack('<Q', len(encoded)))
|
|
f.write(encoded)
|
|
|
|
|
|
def write_metadata_value(f, key, value):
|
|
"""Write a metadata key-value pair."""
|
|
write_string(f, key)
|
|
|
|
if isinstance(value, bool):
|
|
f.write(struct.pack('<I', 7)) # bool type
|
|
f.write(struct.pack('<B', 1 if value else 0))
|
|
elif isinstance(value, int):
|
|
if value < 0:
|
|
f.write(struct.pack('<I', 5)) # int32
|
|
f.write(struct.pack('<i', value))
|
|
elif value <= 0xFFFFFFFF:
|
|
f.write(struct.pack('<I', 4)) # uint32
|
|
f.write(struct.pack('<I', value))
|
|
else:
|
|
f.write(struct.pack('<I', 10)) # uint64
|
|
f.write(struct.pack('<Q', value))
|
|
elif isinstance(value, float):
|
|
f.write(struct.pack('<I', 6)) # float32
|
|
f.write(struct.pack('<f', value))
|
|
elif isinstance(value, str):
|
|
f.write(struct.pack('<I', 8)) # string
|
|
write_string(f, value)
|
|
elif isinstance(value, list):
|
|
f.write(struct.pack('<I', 9)) # array
|
|
if not value:
|
|
f.write(struct.pack('<I', 4)) # uint32 array
|
|
f.write(struct.pack('<Q', 0))
|
|
elif isinstance(value[0], str):
|
|
f.write(struct.pack('<I', 8)) # string array
|
|
f.write(struct.pack('<Q', len(value)))
|
|
for v in value:
|
|
write_string(f, v)
|
|
elif isinstance(value[0], int):
|
|
f.write(struct.pack('<I', 5)) # int32 array
|
|
f.write(struct.pack('<Q', len(value)))
|
|
for v in value:
|
|
f.write(struct.pack('<i', v))
|
|
elif isinstance(value[0], float):
|
|
f.write(struct.pack('<I', 6)) # float32 array
|
|
f.write(struct.pack('<Q', len(value)))
|
|
for v in value:
|
|
f.write(struct.pack('<f', v))
|
|
|
|
|
|
def assemble_gguf(organ_dir, output_path, verbose=False):
|
|
"""
|
|
Assemble a GGUF file from extracted/grafted organs.
|
|
|
|
Reads manifest.json for structure, then concatenates
|
|
organ .bin files back into a valid GGUF.
|
|
"""
|
|
manifest_path = Path(organ_dir) / 'manifest.json'
|
|
if not manifest_path.exists():
|
|
print(f"[ERROR] No manifest.json in {organ_dir}")
|
|
sys.exit(1)
|
|
|
|
with open(manifest_path) as f:
|
|
manifest = json.load(f)
|
|
|
|
model_name = manifest['model']
|
|
print(f"[ASSEMBLE] Model: {model_name}")
|
|
print(f"[ASSEMBLE] Architecture: {manifest['architecture']}")
|
|
|
|
# Collect all organs in correct order
|
|
organs = []
|
|
for key, entry in manifest['organs'].items():
|
|
filepath = Path(organ_dir) / entry['file']
|
|
if not filepath.exists():
|
|
print(f"[WARNING] Missing organ: {entry['file']}")
|
|
continue
|
|
organs.append((entry, filepath))
|
|
|
|
# Sort by original order (layer number, then type priority)
|
|
type_priority = {'embed': 0, 'norm': 1, 'skeleton': 2, 'organ': 3, 'organ_expert': 4, 'adapter': 5, 'unknown': 6}
|
|
organs.sort(key=lambda o: (
|
|
o[0]['layer'] if o[0]['layer'] >= 0 else -1,
|
|
type_priority.get(o[0]['type'], 99),
|
|
o[0]['name']
|
|
))
|
|
|
|
print(f"[ASSEMBLE] Organs: {len(organs)}")
|
|
|
|
# Collect metadata to write
|
|
metadata = {}
|
|
for key, value in manifest.get('metadata', {}).items():
|
|
if key.startswith('_'):
|
|
continue
|
|
metadata[key] = value
|
|
|
|
# Filter metadata to only serializable types
|
|
clean_metadata = {}
|
|
for k, v in metadata.items():
|
|
if isinstance(v, (str, int, float, bool, list)):
|
|
clean_metadata[k] = v
|
|
|
|
n_tensors = len(organs)
|
|
n_metadata = len(clean_metadata)
|
|
|
|
with open(output_path, 'wb') as f:
|
|
# ═══ HEADER ═══
|
|
f.write(struct.pack('<I', GGUF_MAGIC))
|
|
f.write(struct.pack('<I', 3)) # GGUF version 3
|
|
f.write(struct.pack('<Q', n_tensors))
|
|
f.write(struct.pack('<Q', n_metadata))
|
|
|
|
# ═══ METADATA ═══
|
|
for key, value in clean_metadata.items():
|
|
write_metadata_value(f, key, value)
|
|
|
|
# ═══ TENSOR INFO ═══
|
|
# First pass: write tensor info headers
|
|
# We need to calculate offsets
|
|
tensor_data_list = []
|
|
|
|
for entry, filepath in organs:
|
|
# Read organ file
|
|
with open(filepath, 'rb') as organ_f:
|
|
# Read header
|
|
name_len = struct.unpack('<I', organ_f.read(4))[0]
|
|
name = organ_f.read(name_len).decode('utf-8', errors='replace')
|
|
n_dims = struct.unpack('<I', organ_f.read(4))[0]
|
|
dims = [struct.unpack('<Q', organ_f.read(8))[0] for _ in range(n_dims)]
|
|
dtype = struct.unpack('<I', organ_f.read(4))[0]
|
|
data = organ_f.read()
|
|
|
|
tensor_data_list.append({
|
|
'name': name,
|
|
'dims': dims,
|
|
'dtype': dtype,
|
|
'data': data,
|
|
})
|
|
|
|
# Write tensor info
|
|
current_offset = 0
|
|
for td in tensor_data_list:
|
|
write_string(f, td['name'])
|
|
f.write(struct.pack('<I', len(td['dims'])))
|
|
for d in td['dims']:
|
|
f.write(struct.pack('<Q', d))
|
|
f.write(struct.pack('<I', td['dtype']))
|
|
f.write(struct.pack('<Q', current_offset))
|
|
|
|
# Align data to 32 bytes
|
|
data_len = len(td['data'])
|
|
current_offset += data_len
|
|
padding = (32 - current_offset % 32) % 32
|
|
current_offset += padding
|
|
|
|
# ═══ TENSOR DATA ═══
|
|
# Align to 32 bytes before data section
|
|
pos = f.tell()
|
|
padding = (32 - pos % 32) % 32
|
|
f.write(b'\x00' * padding)
|
|
|
|
# Write all tensor data with alignment
|
|
for i, td in enumerate(tensor_data_list):
|
|
f.write(td['data'])
|
|
|
|
# Pad to 32-byte alignment (except last)
|
|
if i < len(tensor_data_list) - 1:
|
|
pos = f.tell()
|
|
# Calculate padding relative to data section start
|
|
data_padding = (32 - len(td['data']) % 32) % 32
|
|
f.write(b'\x00' * data_padding)
|
|
|
|
if verbose:
|
|
print(f" [{i+1}/{len(tensor_data_list)}] {td['name'][:50]} ({len(td['data'])} bytes)")
|
|
|
|
output_size = os.path.getsize(output_path)
|
|
output_mb = output_size / (1024 * 1024)
|
|
output_gb = output_size / (1024 * 1024 * 1024)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f" ASSEMBLY COMPLETE")
|
|
print(f"{'='*60}")
|
|
print(f" Model: {model_name}")
|
|
print(f" Tensors: {n_tensors}")
|
|
print(f" Size: {output_gb:.2f} GB ({output_mb:.0f} MB)")
|
|
print(f" Output: {output_path}")
|
|
print(f" Signature: 935")
|
|
print(f"{'='*60}")
|
|
|
|
return output_path
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Organ Architecture — Assemble GGUF from organs',
|
|
epilog='Signature 935'
|
|
)
|
|
parser.add_argument('--dir', '-d', required=True, help='Organs directory (with manifest.json)')
|
|
parser.add_argument('--output', '-o', required=True, help='Output GGUF file path')
|
|
parser.add_argument('--verbose', '-v', action='store_true')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.isdir(args.dir):
|
|
print(f"[ERROR] Directory not found: {args.dir}")
|
|
sys.exit(1)
|
|
|
|
assemble_gguf(args.dir, args.output, args.verbose)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
# ╔══ SALKA ELMADANI AUTHORSHIP CERTIFICATE ══╗
|
|
# © Salka Elmadani 2025-2026 — ALL RIGHTS RESERVED
|
|
# Licensed under Business Source License 1.1 — https://inference-x.com
|
|
# ─────────────────────────────────────────────────────────
|
|
# SHA256: 56ce59cd04118749c0c40c8bdb6d566a59c8902e233709a013dca9a38658cc44
|
|
# SIG-ED25519: tDk5EuOHITlQbZHbZ/HbOz8+111fot0dk4iQMDEWKjsq5gsKyGNbvAwTGl0hfkD0gUdhG0nPxczaCswlct7PCA==
|
|
# VERIFY: python3 verify_authorship.py organ_assemble.py
|