149 lines
5.2 KiB
Python
149 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
MODEL 935 — Proper GGUF assembler
|
|
Reads source GGUF header intact, replaces tensor data from organ bins
|
|
(stripping the organ header that organ_extract added)
|
|
|
|
Z = dI/d(log s) · exp(iθ) — Signature 935
|
|
"""
|
|
import struct, os, sys, json
|
|
|
|
def build_model_935(source_gguf, organs_dir, output_gguf):
|
|
f = open(source_gguf, "rb")
|
|
|
|
# Read GGUF header
|
|
magic = struct.unpack("<I", f.read(4))[0]
|
|
version = struct.unpack("<I", f.read(4))[0]
|
|
n_tensors = struct.unpack("<Q", f.read(8))[0]
|
|
n_metadata = struct.unpack("<Q", f.read(8))[0]
|
|
|
|
print(f"Source: {os.path.basename(source_gguf)}")
|
|
print(f" Version: {version}, Tensors: {n_tensors}, Metadata: {n_metadata}")
|
|
|
|
def read_string():
|
|
slen = struct.unpack("<Q", f.read(8))[0]
|
|
return f.read(slen).decode("utf-8")
|
|
|
|
def skip_value(vtype):
|
|
sizes = {0:1, 1:1, 2:2, 3:2, 4:4, 5:4, 6:4, 7:1, 10:8, 11:8, 12:8}
|
|
if vtype in sizes:
|
|
f.read(sizes[vtype])
|
|
elif vtype == 8:
|
|
read_string()
|
|
elif vtype == 9:
|
|
arr_type = struct.unpack("<I", f.read(4))[0]
|
|
arr_len = struct.unpack("<Q", f.read(8))[0]
|
|
for _ in range(arr_len):
|
|
skip_value(arr_type)
|
|
|
|
# Skip metadata
|
|
for _ in range(n_metadata):
|
|
read_string()
|
|
vtype = struct.unpack("<I", f.read(4))[0]
|
|
skip_value(vtype)
|
|
|
|
# Read tensor info
|
|
tensor_info = []
|
|
for _ in range(n_tensors):
|
|
name = read_string()
|
|
n_dims = struct.unpack("<I", f.read(4))[0]
|
|
dims = [struct.unpack("<Q", f.read(8))[0] for _ in range(n_dims)]
|
|
dtype = struct.unpack("<I", f.read(4))[0]
|
|
offset = struct.unpack("<Q", f.read(8))[0]
|
|
tensor_info.append({"name": name, "dims": dims, "dtype": dtype, "offset": offset})
|
|
|
|
# Calculate data_start (aligned to 32)
|
|
pos = f.tell()
|
|
padding = (32 - (pos % 32)) % 32
|
|
f.read(padding)
|
|
data_start = f.tell()
|
|
print(f" Header: {data_start} bytes, Data start: {data_start}")
|
|
|
|
# Build organ map: name -> filepath (strip organ bin header to get raw data)
|
|
organ_map = {}
|
|
for category in ["skeleton", "organs", "embed", "norm", "adapters", "unknown"]:
|
|
cat_dir = os.path.join(organs_dir, category)
|
|
if os.path.isdir(cat_dir):
|
|
for fname in os.listdir(cat_dir):
|
|
if fname.endswith(".bin"):
|
|
tname = fname[:-4] # remove .bin
|
|
organ_map[tname] = os.path.join(cat_dir, fname)
|
|
|
|
print(f" Organ files: {len(organ_map)}")
|
|
|
|
def read_organ_raw_data(filepath):
|
|
"""Read organ bin, skip the header, return only raw tensor data."""
|
|
with open(filepath, "rb") as of:
|
|
# Skip organ header: name_len(4) + name + ndims(4) + dims(8*ndims) + dtype(4)
|
|
name_len = struct.unpack("<I", of.read(4))[0]
|
|
of.read(name_len) # name
|
|
n_dims = struct.unpack("<I", of.read(4))[0]
|
|
for _ in range(n_dims):
|
|
of.read(8) # dims
|
|
of.read(4) # dtype
|
|
# Rest is raw tensor data
|
|
return of.read()
|
|
|
|
# Copy full header (everything up to data_start)
|
|
f.seek(0)
|
|
header = f.read(data_start)
|
|
|
|
out = open(output_gguf, "wb")
|
|
out.write(header)
|
|
|
|
# Write tensor data in correct order
|
|
written_from_organ = 0
|
|
written_from_source = 0
|
|
|
|
for i, ti in enumerate(tensor_info):
|
|
name = ti["name"]
|
|
safe_name = name.replace(".", "_")
|
|
organ_path = organ_map.get(safe_name)
|
|
|
|
# Calculate tensor size from offset info
|
|
if i + 1 < len(tensor_info):
|
|
tensor_size = tensor_info[i+1]["offset"] - ti["offset"]
|
|
else:
|
|
f.seek(0, 2)
|
|
file_end = f.tell()
|
|
tensor_size = file_end - (data_start + ti["offset"])
|
|
|
|
if organ_path and os.path.exists(organ_path):
|
|
raw_data = read_organ_raw_data(organ_path)
|
|
if len(raw_data) == tensor_size:
|
|
out.write(raw_data)
|
|
written_from_organ += 1
|
|
else:
|
|
# Size mismatch — use source
|
|
f.seek(data_start + ti["offset"])
|
|
out.write(f.read(tensor_size))
|
|
written_from_source += 1
|
|
else:
|
|
# Not in organs — use source
|
|
f.seek(data_start + ti["offset"])
|
|
out.write(f.read(tensor_size))
|
|
written_from_source += 1
|
|
|
|
out.close()
|
|
f.close()
|
|
|
|
final_size = os.path.getsize(output_gguf)
|
|
source_size = os.path.getsize(source_gguf)
|
|
|
|
print(f"\n Output: {output_gguf}")
|
|
print(f" Size: {final_size / (1024**3):.2f} GB (source: {source_size / (1024**3):.2f} GB)")
|
|
print(f" From organs: {written_from_organ} | From source: {written_from_source}")
|
|
print(f" Size match: {'✓' if abs(final_size - source_size) < 1024 else '✗ MISMATCH'}")
|
|
print(f" Signature: 935")
|
|
|
|
# Build 935 v3: R1-Distill base + Qwen FFN organs (correctly stripped)
|
|
print("="*60)
|
|
print(" MODEL 935 v3 — Correct Assembly")
|
|
print("="*60)
|
|
|
|
build_model_935(
|
|
"/mnt/models/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf",
|
|
"/root/organ-architecture/organs/model-935-v2",
|
|
"/mnt/models/model-935-v3.gguf"
|
|
)
|