12104 lines
217 KiB
JSON
12104 lines
217 KiB
JSON
{
|
|
"model": "Kimi-K2.5-1T",
|
|
"quant": "Q4_0",
|
|
"total_shards": 13,
|
|
"shards_completed": 13,
|
|
"total_tensors": 1096,
|
|
"tensors": [
|
|
{
|
|
"name": "output.weight",
|
|
"dims": [
|
|
7168,
|
|
163840
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 1174405120,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "output_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "token_embd.weight",
|
|
"dims": [
|
|
7168,
|
|
163840
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 1174405120,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 86.27,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 81.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 85.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.18,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 55.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.ffn_down.weight",
|
|
"dims": [
|
|
18432,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 132120576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.0.ffn_gate.weight",
|
|
"dims": [
|
|
7168,
|
|
18432
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 132120576,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.0.ffn_up.weight",
|
|
"dims": [
|
|
7168,
|
|
18432
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 132120576,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 81.09,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.39,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 86.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 78.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 5637144576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 14680064,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.1.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 52.18,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 82.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.57,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 53.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.06,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 5637144576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 14680064,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.36,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.2.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 74.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.06,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 69.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 5637144576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 14680064,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.39,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.3.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 61.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.27,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 68.28,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 5637144576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 14680064,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.39,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.4.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.25,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 49.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.57,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 89.4,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 5637144576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 14680064,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 87.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.5.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.27,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 45.21,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 86.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.21,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.28,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 59.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.2,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 5637144576,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q4_1",
|
|
"n_elements": 14680064,
|
|
"theta": null,
|
|
"status": "unsupported_dtype_3"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.6.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 40.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 64.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.12,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.7.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 77.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 87.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 84.23,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 86.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.8.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 65.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 89.4,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.37,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.9.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 59.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 65.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.10.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 62.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.57,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.05,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.43,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 61.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.11.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 79.16,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.25,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.18,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 81.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.57,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.12.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 75.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 88.32,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.13.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 85.52,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.24,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.17,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.15,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.14.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 67.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 84.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.15,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.12,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.07,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.15.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 81.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.43,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 79.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.52,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.28,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.25,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.16.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 61.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.17,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 82.18,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.01,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.17.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.24,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 84.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 85.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.09,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.18.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 52.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.21,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 67.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 82.14,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.4,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.19.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.2,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 71.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 73.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.20.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 80.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.32,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 71.13,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.37,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.28,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.21.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 68.11,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 87.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.15,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.29,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.22.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.05,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 76.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.23,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 85.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 83.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.23.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.39,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 88.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.23,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 84.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.39,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.43,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.24.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 54.24,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 73.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.25.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 83.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 86.43,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 71.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.4,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.26.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 82.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 77.52,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.11,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.13,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.27.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 81.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.16,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 77.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.03,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.57,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.28.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 62.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 82.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.29.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 80.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 82.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.2,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 80.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.30.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 79.25,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.18,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 87.21,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.31.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.27,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 72.12,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.29,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.17,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 88.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.46,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.32.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 69.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 85.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 55.46,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.33.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 71.43,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 74.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.34.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 87.08,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.07,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 87.21,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.46,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.35.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.11,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 73.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 87.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 85.13,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 84.37,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.32,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.36.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.18,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 83.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.37,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 63.16,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.37.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.52,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 66.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 87.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 87.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.38.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.24,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 85.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.21,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.15,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 86.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 79.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.39.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 84.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 82.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.1,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 83.15,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.40.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 72.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.41.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 73.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.56,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 81.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.42.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 50.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 75.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.43.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 64.17,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 83.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.44.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 61.22,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.08,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.67,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 88.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.43,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.29,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.45.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 65.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.06,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.2,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 65.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 84.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.46,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.46.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 69.52,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.19,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 79.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.47.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 64.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 87.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 72.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 84.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.48.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 87.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 71.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.2,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 79.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.26,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.58,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.49.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 78.37,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.48,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 82.52,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.50.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.49,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 85.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.29,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.96,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.48,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 84.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 85.53,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.93,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.57,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.51.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.61,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 76.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 84.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 87.09,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.85,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.44,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.52.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.5,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 77.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 82.04,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.08,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.32,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.77,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.27,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 88.39,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.62,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.53.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.32,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 66.17,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.15,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.86,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 66.4,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 86.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.59,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.35,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.34,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.54.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 62.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 87.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.54,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.42,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.09,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.75,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.55.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 62.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.46,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 88.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 88.84,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 77.89,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.78,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 87.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.66,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.56.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 70.3,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 88.37,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.95,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 89.14,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.7,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 72.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.06,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.94,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.72,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.83,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.57.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.36,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 69.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.82,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 89.33,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.38,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.87,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 72.1,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 87.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.76,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 90.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.65,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.68,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.58.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.47,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 75.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 86.41,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 86.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.2,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.8,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.9,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.74,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.98,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 82.73,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 88.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.97,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.45,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.59.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.0,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_k_b.weight",
|
|
"dims": [
|
|
128,
|
|
512,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 85.79,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_kv_a_mqa.weight",
|
|
"dims": [
|
|
7168,
|
|
576
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4128768,
|
|
"theta": 89.6,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_kv_a_norm.weight",
|
|
"dims": [
|
|
512
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 512,
|
|
"theta": 88.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.88,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_output.weight",
|
|
"dims": [
|
|
8192,
|
|
7168
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 58720256,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_q_a.weight",
|
|
"dims": [
|
|
7168,
|
|
1536
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 11010048,
|
|
"theta": 89.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_q_a_norm.weight",
|
|
"dims": [
|
|
1536
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 1536,
|
|
"theta": 87.71,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_q_b.weight",
|
|
"dims": [
|
|
1536,
|
|
12288
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 18874368,
|
|
"theta": 89.55,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.attn_v_b.weight",
|
|
"dims": [
|
|
512,
|
|
128,
|
|
64
|
|
],
|
|
"dtype": "Q8_0",
|
|
"n_elements": 4194304,
|
|
"theta": 64.16,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.exp_probs_b.bias",
|
|
"dims": [
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 384,
|
|
"theta": 89.69,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_down_exps.weight",
|
|
"dims": [
|
|
2048,
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.81,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_down_shexp.weight",
|
|
"dims": [
|
|
2048,
|
|
7168
|
|
],
|
|
"dtype": "Q6_K",
|
|
"n_elements": 14680064,
|
|
"theta": 89.51,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_gate_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.64,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_gate_inp.weight",
|
|
"dims": [
|
|
7168,
|
|
384
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 2752512,
|
|
"theta": 89.99,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_gate_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 87.63,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_norm.weight",
|
|
"dims": [
|
|
7168
|
|
],
|
|
"dtype": "F32",
|
|
"n_elements": 7168,
|
|
"theta": 89.92,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_up_exps.weight",
|
|
"dims": [
|
|
7168,
|
|
2048,
|
|
384
|
|
],
|
|
"dtype": "Q4_0",
|
|
"n_elements": 5637144576,
|
|
"theta": 89.91,
|
|
"status": "ok"
|
|
},
|
|
{
|
|
"name": "blk.60.ffn_up_shexp.weight",
|
|
"dims": [
|
|
7168,
|
|
2048
|
|
],
|
|
"dtype": "Q5_K",
|
|
"n_elements": 14680064,
|
|
"theta": 88.65,
|
|
"status": "ok"
|
|
}
|
|
],
|
|
"summary": {
|
|
"total_time_hours": 1.69,
|
|
"overall_theta": 87.65,
|
|
"overall_std": 6.02,
|
|
"groups": {
|
|
"output": {
|
|
"theta": 89.97,
|
|
"std": 0.0,
|
|
"count": 1
|
|
},
|
|
"norm": {
|
|
"theta": 89.94,
|
|
"std": 0.0,
|
|
"count": 1
|
|
},
|
|
"embed": {
|
|
"theta": 89.45,
|
|
"std": 0.0,
|
|
"count": 1
|
|
},
|
|
"attention": {
|
|
"theta": 85.92,
|
|
"std": 8.04,
|
|
"count": 549
|
|
},
|
|
"ffn": {
|
|
"theta": 89.82,
|
|
"std": 0.23,
|
|
"count": 123
|
|
},
|
|
"other": {
|
|
"theta": 87.52,
|
|
"std": 1.59,
|
|
"count": 60
|
|
},
|
|
"moe_experts": {
|
|
"theta": 89.61,
|
|
"std": 0.41,
|
|
"count": 348
|
|
}
|
|
}
|
|
}
|
|
} |