From bc4b4ba756402605f8e718077e72147a4202d95f Mon Sep 17 00:00:00 2001 From: qinyiqun Date: Wed, 17 Jul 2024 15:25:20 +0800 Subject: [PATCH 1/3] add split --- ggus/src/header.rs | 2 +- ggus/src/metadata/mod.rs | 8 + ggus/src/tensor.rs | 8 + ggus/src/writer.rs | 6 +- xtask/src/merge.rs | 10 +- xtask/src/split.rs | 354 ++++++++++++++++++++++++++++++++++++++- 6 files changed, 378 insertions(+), 10 deletions(-) diff --git a/ggus/src/header.rs b/ggus/src/header.rs index 27282aa..d40fa14 100644 --- a/ggus/src/header.rs +++ b/ggus/src/header.rs @@ -1,7 +1,7 @@ use crate::sizeof; use std::str::Utf8Error; -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone)] #[repr(C)] pub struct GGufFileHeader { magic: [u8; 4], diff --git a/ggus/src/metadata/mod.rs b/ggus/src/metadata/mod.rs index 25931d8..b8502fc 100644 --- a/ggus/src/metadata/mod.rs +++ b/ggus/src/metadata/mod.rs @@ -82,6 +82,14 @@ pub struct GGufMetaKVPairs<'a> { } impl<'a> GGufMetaKVPairs<'a> { + pub fn new(nbytes_: usize) -> Self { + let indices_ = IndexMap::new(); + Self { + indices: indices_, + nbytes: nbytes_, + } + } + pub fn scan(count: u64, data: &'a [u8]) -> Result> { let mut reader = GGufReader::new(data); let mut indices = IndexMap::with_capacity(count as _); diff --git a/ggus/src/tensor.rs b/ggus/src/tensor.rs index 0041051..04dd4ad 100644 --- a/ggus/src/tensor.rs +++ b/ggus/src/tensor.rs @@ -93,6 +93,14 @@ pub struct GGufTensors<'a> { } impl<'a> GGufTensors<'a> { + pub fn new(nbytes_: usize) -> Self { + let indices_: IndexMap<&str, ()> = IndexMap::new(); + Self { + indices: indices_, + nbytes: nbytes_, + } + } + pub fn scan(count: u64, data: &'a [u8]) -> Result> { let mut reader = GGufReader::new(data); let mut indices = IndexMap::with_capacity(count as _); diff --git a/ggus/src/writer.rs b/ggus/src/writer.rs index d44f1db..54edb11 100644 --- a/ggus/src/writer.rs +++ b/ggus/src/writer.rs @@ -10,9 +10,9 @@ pub struct GGufWriter(BufWriter, usize); impl GGufWriter { #[inline] pub fn new(writer: T, header: GGufFileHeader) -> Result { - let mut buf = BufWriter::new(writer); - buf.write_all(as_slice(&header))?; - Ok(Self(buf, 0)) + let mut ans = Self(BufWriter::new(writer), 0); + ans.write_bytes(as_slice(&header))?; + Ok(ans) } #[inline] diff --git a/xtask/src/merge.rs b/xtask/src/merge.rs index d30f88c..f982c36 100644 --- a/xtask/src/merge.rs +++ b/xtask/src/merge.rs @@ -57,7 +57,9 @@ impl MergeArgs { let out = File::create(shards.single_file()).unwrap(); let header = GGufFileHeader::new(3, tensors.len() as _, (kvs.len() + 1) as _); - let mut writer: GGufWriter = GGufWriter::new(out, header).unwrap(); + // let mut writer: GGufWriter = GGufWriter::new(out).unwrap(); + // writer.write_head(header).unwrap(); + let mut writer = GGufWriter::new(out, header).unwrap(); let align = files .iter() @@ -68,7 +70,7 @@ impl MergeArgs { writer .write_meta_kv( "general.alignment", - GGufMetaDataValueType::U64, + GGufMetaDataValueType::U32, (align as u64).to_le_bytes(), ) .unwrap(); @@ -135,9 +137,11 @@ impl<'a> GGufFile<'a> { if !header.is_magic_correct() { return Err(GGufError::MagicMismatch); } + if !header.is_native_endian() { return Err(GGufError::EndianNotSupport); } + if header.version != 3 { return Err(GGufError::VersionNotSupport); } @@ -147,11 +151,13 @@ impl<'a> GGufFile<'a> { .map_err(GGufError::Reading)?; let cursor = cursor + meta_kvs.nbytes(); + let tensors = GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?; let align = meta_kvs.alignment(); let cursor = (cursor + tensors.nbytes() + align - 1) / align * align; + Ok(Self { meta_kvs, tensors, diff --git a/xtask/src/split.rs b/xtask/src/split.rs index fb269be..add883d 100644 --- a/xtask/src/split.rs +++ b/xtask/src/split.rs @@ -1,13 +1,359 @@ -use std::path::PathBuf; +use ggus::{ + GGufFileHeader, GGufMetaDataValueType, GGufMetaKVPairs, GGufReadError, GGufTensors, GGufWriter, +}; +use indexmap::IndexMap; +use std::{fs::File, path::PathBuf}; + +const GGUF_VERSION: u32 = 3; +const GGUF_DEFAULT_ALIGNMENT: usize = 32; +const LLM_KV_SPLIT_NO: &str = "split.no"; +const LLM_KV_SPLIT_COUNT: &str = "split.count"; +const LLM_KV_SPLIT_TENSORS_COUNT: &str = "split.tensors.count"; #[derive(Args, Default)] pub struct SplitArgs { - #[clap(long, short)] - file: PathBuf, + #[clap(long)] + input: PathBuf, + #[clap(long)] + output: Option, + // default 128 tensors + #[clap(long)] + split_max_tensors: Option, + #[clap(long)] + split_max_size: Option, + #[clap(long)] + no_tensor_first_split: bool, + n_bytes_split: u64, + n_split_tensors: u64, +} + +#[derive(Clone)] +struct GGufFile<'a> { + header: GGufFileHeader, + meta_kvs: GGufMetaKVPairs<'a>, + tensors: GGufTensors<'a>, + data: &'a [u8], +} + +#[derive(Debug)] +enum GGufError<'a> { + MagicMismatch, + EndianNotSupport, + VersionNotSupport, + #[allow(dead_code)] + Reading(GGufReadError<'a>), + FileSizeError, + SplitModeRepeated, +} + +impl<'a> GGufFile<'a> { + fn new(data: &'a [u8]) -> Result> { + let header = unsafe { data.as_ptr().cast::().read() }; + if !header.is_magic_correct() { + return Err(GGufError::MagicMismatch); + } + if !header.is_native_endian() { + return Err(GGufError::EndianNotSupport); + } + if header.version != 3 { + return Err(GGufError::VersionNotSupport); + } + + let cursor = header.nbytes(); + let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..]) + .map_err(GGufError::Reading)?; + + let cursor = cursor + meta_kvs.nbytes(); + let tensors = + GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?; + + let align = meta_kvs.alignment(); + let cursor = (cursor + tensors.nbytes() + align - 1) / align * align; + Ok(Self { + header, + meta_kvs, + tensors, + data: &data[cursor..], + }) + } +} + +#[derive(Clone)] +struct GGufFileInfo<'a> { + output_path: String, + header: GGufFileHeader, + meta_kvs: GGufMetaKVPairs<'a>, + new_kv_tuples: Vec<(String, GGufMetaDataValueType, u64)>, +} + +impl<'a> GGufFileInfo<'a> { + fn new_empty() -> Self { + let header = GGufFileHeader::new(GGUF_VERSION, 0, 0); + let meta_kvs = GGufMetaKVPairs::new(0); + let new_kv_tuples: Vec<(String, GGufMetaDataValueType, u64)> = Vec::new(); + let output_path = "".to_string(); + Self { + output_path, + header, + meta_kvs, + new_kv_tuples, + } + } } impl SplitArgs { pub fn split(self) { - todo!() + let file = File::open(&self.input) + .map_err(|e| { + println!("Failed to open"); + eprintln!(" file: {}", self.input.display()); + eprintln!(" cause: {e}"); + }) + .unwrap(); + let mmap = unsafe { memmap2::Mmap::map(&file).unwrap() }; + let ctx_gguf: GGufFile = GGufFile::new(&mmap).unwrap(); + + let align = ctx_gguf.meta_kvs.alignment(); + let tensors = ctx_gguf + .tensors + .iter() + .map(move |t| (t, ctx_gguf.data)) + .collect::>(); + let ggufs = self.split_strategy(ctx_gguf.clone()).unwrap(); + let mut tensor_iter: indexmap::map::Iter = tensors.iter(); + + for gguf in ggufs { + let out = File::create(gguf.output_path).unwrap(); + + let header = gguf.header; + let tensor_count: u64 = header.tensor_count; + let mut writer = GGufWriter::new(out, header).unwrap(); + + let kvs = gguf.meta_kvs.kvs(); + for kv in kvs { + writer + .write_meta_kv(kv.key(), kv.ty(), kv.value_bytes()) + .unwrap(); + } + + for kv in gguf.new_kv_tuples { + match kv.1 { + GGufMetaDataValueType::U16 => writer + .write_meta_kv(kv.0, kv.1, (kv.2 as u16).to_le_bytes()) + .unwrap(), + GGufMetaDataValueType::I32 => writer + .write_meta_kv(kv.0, kv.1, (kv.2 as i32).to_le_bytes()) + .unwrap(), + _ => (), + } + } + + let mut cursor = 0; + let mut paddings = Vec::with_capacity(tensor_count as usize + 1); + paddings.push(0); + + let mut tensor_info_iter: indexmap::map::Iter = + tensor_iter.clone(); + + for _ in 0..tensor_count { + let (tensor_info, _) = tensor_info_iter.next().unwrap(); + + writer + .write_tensor_info( + tensor_info.name(), + tensor_info.shape(), + tensor_info.ggml_type(), + cursor, + ) + .unwrap(); + + cursor += tensor_info.nbytes(); + let padding = pad(cursor, align); + + cursor += padding; + paddings.push(padding); + } + + paddings.pop(); + if !paddings.is_empty() { + paddings[0] = pad(writer.written_bytes(), GGUF_DEFAULT_ALIGNMENT); + } + + for padding in paddings { + for _ in 0..padding { + writer.write(0u8).unwrap(); + } + + let (t, data) = tensor_iter.next().unwrap(); + writer + .write_bytes(&data[t.offset()..][..t.nbytes()]) + .unwrap(); + } + + let end_padding = pad(writer.written_bytes(), GGUF_DEFAULT_ALIGNMENT); + for _ in 0..end_padding { + writer.write(0u8).unwrap(); + } + } + } + + fn split_strategy(mut self, ctx_gguf: GGufFile) -> Result, GGufError> { + use GGufMetaDataValueType as ty; + match (self.split_max_size.clone(), self.split_max_tensors) { + (Some(_), Some(_)) => { + return Err(GGufError::SplitModeRepeated); + } + (Some(max_size), None) => { + fn parse_split_max_size(split_max_size: String) -> Option { + if split_max_size.is_empty() { + return None; + } + let symbol = split_max_size.chars().last().unwrap(); + let len = split_max_size.len(); + let size = split_max_size[..len - 1].parse::(); + match size { + Ok(num) => { + if symbol == 'M' { + Some(num * 1000 * 1000) + } else if symbol == 'G' { + Some(num * 1000 * 1000 * 1000) + } else { + None + } + } + Err(_) => None, + } + } + + match parse_split_max_size(max_size) { + Some(size) => { + self.n_bytes_split = size; + } + None => { + // 错误的格式 + return Err(GGufError::FileSizeError); + } + } + } + _ => match self.split_max_tensors { + Some(tensors) => { + self.n_bytes_split = 0; + self.n_split_tensors = tensors; + } + None => { + self.n_bytes_split = 0; + self.n_split_tensors = 128; + } + }, + } + + let tensors = ctx_gguf + .tensors + .iter() + .map(move |t| (t, ctx_gguf.data)) + .collect::>(); + + let mut ggufs: Vec = Vec::new(); + let n_tensors: u64 = ctx_gguf.header.tensor_count; + + let setup_gguf_file = |i_split: u64, n_tensors: u64| { + let mut gguf_file = GGufFileInfo::new_empty(); + gguf_file + .new_kv_tuples + .push((LLM_KV_SPLIT_NO.to_string(), ty::U16, i_split)); + gguf_file + .new_kv_tuples + .push((LLM_KV_SPLIT_COUNT.to_string(), ty::U16, 0)); + gguf_file.new_kv_tuples.push(( + LLM_KV_SPLIT_TENSORS_COUNT.to_string(), + ty::I32, + n_tensors, + )); + gguf_file.header.metadata_kv_count += 3; + gguf_file + }; + + let mut i_split: u64 = 0; + let mut gguf_file = setup_gguf_file(i_split, n_tensors); + gguf_file.meta_kvs = ctx_gguf.meta_kvs.clone(); + gguf_file.header.metadata_kv_count += ctx_gguf.header.metadata_kv_count; + + if self.no_tensor_first_split { + i_split += 1; + ggufs.push(gguf_file); + gguf_file = setup_gguf_file(i_split, n_tensors); + } + + let mut curr_tensors_size: u64 = 0; + let mut i_tensor = 0; + + for t in tensors.keys() { + i_tensor += 1; + let tensor_size = t.nbytes(); + let n_bytes = (pad(tensor_size, GGUF_DEFAULT_ALIGNMENT) + tensor_size) as u64; + let next_tensor_size = curr_tensors_size + n_bytes; + + if self.should_split(i_tensor, n_tensors, next_tensor_size) { + ggufs.push(gguf_file.clone()); + i_tensor = 0; + i_split += 1; + gguf_file = setup_gguf_file(i_split, n_tensors); + curr_tensors_size = n_bytes; + } else { + curr_tensors_size = next_tensor_size; + } + gguf_file.header.tensor_count += 1; + } + + ggufs.push(gguf_file); + + let tensor_count = ggufs.len() as u64; + let output_path = &self.output.unwrap(); + let mut index = 0; + while index < ggufs.len() { + ggufs[index].output_path = split_path(output_path, index + 1, ggufs.len()); + ggufs[index].new_kv_tuples[1] = (LLM_KV_SPLIT_COUNT.to_string(), ty::U16, tensor_count); + index += 1; + } + + Ok(ggufs) + } + + fn should_split(&self, i_tensor: u64, n_tensors: u64, next_size: u64) -> bool { + if self.n_bytes_split > 0 { + next_size > self.n_bytes_split + } else { + i_tensor > 0 && i_tensor < n_tensors && i_tensor % self.n_split_tensors == 0 + } + } +} + +#[inline(always)] +const fn pad(pos: usize, align: usize) -> usize { + (align - pos % align) % align +} + +fn split_path(path_prefix: &String, split_no: usize, split_count: usize) -> String { + format!("{}-{:05}-of-{:05}.gguf", path_prefix, split_no, split_count) +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn it_works() { + let input = PathBuf::from("/home/qinyiqun/gguf/xtask/src/h2o-danube3-500m-chat-F16.gguf"); + let output = Some("/home/qinyiqun/gguf/xtask/src/test/rust/rust".to_string()); + let split_args = SplitArgs { + input, + output, + split_max_tensors: None, + split_max_size: Some("300M".to_string()), + no_tensor_first_split: true, + n_bytes_split: 0, + n_split_tensors: 0, + }; + + split_args.split(); } } From c552caaaa2a06da483def24c29f01e42801dfef4 Mon Sep 17 00:00:00 2001 From: qinyiqun Date: Fri, 19 Jul 2024 13:25:15 +0800 Subject: [PATCH 2/3] fix a bug and update style --- xtask/src/gguf_file.rs | 73 ++++++++++++++++++++++++++++ xtask/src/main.rs | 1 + xtask/src/merge.rs | 72 +++------------------------ xtask/src/split.rs | 107 +++++++++++------------------------------ 4 files changed, 111 insertions(+), 142 deletions(-) create mode 100644 xtask/src/gguf_file.rs diff --git a/xtask/src/gguf_file.rs b/xtask/src/gguf_file.rs new file mode 100644 index 0000000..85eb130 --- /dev/null +++ b/xtask/src/gguf_file.rs @@ -0,0 +1,73 @@ +use ggus::{GGufFileHeader, GGufMetaKVPairs, GGufReadError, GGufTensors}; +use indexmap::IndexMap; + +#[derive(Clone)] +pub(crate) struct GGufFile<'a> { + header: GGufFileHeader, + meta_kvs: GGufMetaKVPairs<'a>, + tensors: GGufTensors<'a>, + data: &'a [u8], +} + +#[derive(Debug)] +pub(crate) enum GGufError<'a> { + MagicMismatch, + EndianNotSupport, + VersionNotSupport, + #[allow(dead_code)] + Reading(GGufReadError<'a>), + FileSizeError, + SplitModeRepeated, +} + +impl<'a> GGufFile<'a> { + pub(crate) fn new(data: &'a [u8]) -> Result> { + let header = unsafe { data.as_ptr().cast::().read() }; + if !header.is_magic_correct() { + return Err(GGufError::MagicMismatch); + } + if !header.is_native_endian() { + return Err(GGufError::EndianNotSupport); + } + if header.version != 3 { + return Err(GGufError::VersionNotSupport); + } + + let cursor = header.nbytes(); + let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..]) + .map_err(GGufError::Reading)?; + + let cursor = cursor + meta_kvs.nbytes(); + let tensors = + GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?; + + let align = meta_kvs.alignment(); + let cursor = (cursor + tensors.nbytes() + align - 1) / align * align; + Ok(Self { + header, + meta_kvs, + tensors, + data: &data[cursor..], + }) + } + + pub(crate) fn get_header(&self) -> &GGufFileHeader { + &self.header + } + + pub(crate) fn get_meta_kvs(&self) -> &GGufMetaKVPairs<'a> { + &self.meta_kvs + } + + pub(crate) fn get_tensors_as_indexmap(&self) -> IndexMap { + self.tensors + .iter() + .map(move |t| (t, self.data)) + .collect::>() + } +} + +#[inline(always)] +pub(crate) const fn pad(pos: usize, align: usize) -> usize { + (align - pos % align) % align +} diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 6abe059..ef6b743 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -1,3 +1,4 @@ +mod gguf_file; mod loose_shards; mod merge; mod show; diff --git a/xtask/src/merge.rs b/xtask/src/merge.rs index f982c36..a8b0e03 100644 --- a/xtask/src/merge.rs +++ b/xtask/src/merge.rs @@ -1,7 +1,8 @@ -use crate::loose_shards::LooseShards; -use ggus::{ - GGufFileHeader, GGufMetaDataValueType, GGufMetaKVPairs, GGufReadError, GGufTensors, GGufWriter, +use crate::{ + gguf_file::{pad, GGufFile}, + loose_shards::LooseShards, }; +use ggus::{GGufFileHeader, GGufMetaDataValueType, GGufWriter}; use indexmap::{IndexMap, IndexSet}; use std::{fs::File, iter::zip, path::PathBuf, thread}; @@ -44,7 +45,7 @@ impl MergeArgs { let kvs = files .iter() - .flat_map(|file| file.meta_kvs.kvs()) + .flat_map(|file| file.get_meta_kvs().kvs()) .filter(|kv| { let key = kv.key(); !key.starts_with("split.") && key != "general.alignment" @@ -52,18 +53,16 @@ impl MergeArgs { .collect::>(); let tensors = files .iter() - .flat_map(|file| file.tensors.iter().map(move |t| (t, file.data))) + .flat_map(|file| file.get_tensors_as_indexmap()) .collect::>(); let out = File::create(shards.single_file()).unwrap(); let header = GGufFileHeader::new(3, tensors.len() as _, (kvs.len() + 1) as _); - // let mut writer: GGufWriter = GGufWriter::new(out).unwrap(); - // writer.write_head(header).unwrap(); let mut writer = GGufWriter::new(out, header).unwrap(); let align = files .iter() - .map(|file| file.meta_kvs.alignment()) + .map(|file| file.get_meta_kvs().alignment()) .max() .unwrap(); @@ -71,7 +70,7 @@ impl MergeArgs { .write_meta_kv( "general.alignment", GGufMetaDataValueType::U32, - (align as u64).to_le_bytes(), + (align as u32).to_le_bytes(), ) .unwrap(); @@ -110,58 +109,3 @@ impl MergeArgs { } } } - -#[inline(always)] -const fn pad(pos: usize, align: usize) -> usize { - (align - pos % align) % align -} - -struct GGufFile<'a> { - meta_kvs: GGufMetaKVPairs<'a>, - tensors: GGufTensors<'a>, - data: &'a [u8], -} - -#[derive(Debug)] -enum GGufError<'a> { - MagicMismatch, - EndianNotSupport, - VersionNotSupport, - #[allow(dead_code)] - Reading(GGufReadError<'a>), -} - -impl<'a> GGufFile<'a> { - fn new(data: &'a [u8]) -> Result> { - let header = unsafe { data.as_ptr().cast::().read() }; - if !header.is_magic_correct() { - return Err(GGufError::MagicMismatch); - } - - if !header.is_native_endian() { - return Err(GGufError::EndianNotSupport); - } - - if header.version != 3 { - return Err(GGufError::VersionNotSupport); - } - - let cursor = header.nbytes(); - let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..]) - .map_err(GGufError::Reading)?; - - let cursor = cursor + meta_kvs.nbytes(); - - let tensors = - GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?; - - let align = meta_kvs.alignment(); - let cursor = (cursor + tensors.nbytes() + align - 1) / align * align; - - Ok(Self { - meta_kvs, - tensors, - data: &data[cursor..], - }) - } -} diff --git a/xtask/src/split.rs b/xtask/src/split.rs index add883d..c01760b 100644 --- a/xtask/src/split.rs +++ b/xtask/src/split.rs @@ -1,7 +1,5 @@ -use ggus::{ - GGufFileHeader, GGufMetaDataValueType, GGufMetaKVPairs, GGufReadError, GGufTensors, GGufWriter, -}; -use indexmap::IndexMap; +use crate::gguf_file::{pad, GGufError, GGufFile}; +use ggus::{GGufFileHeader, GGufMetaDataValueType, GGufMetaKVPairs, GGufWriter}; use std::{fs::File, path::PathBuf}; const GGUF_VERSION: u32 = 3; @@ -27,57 +25,6 @@ pub struct SplitArgs { n_split_tensors: u64, } -#[derive(Clone)] -struct GGufFile<'a> { - header: GGufFileHeader, - meta_kvs: GGufMetaKVPairs<'a>, - tensors: GGufTensors<'a>, - data: &'a [u8], -} - -#[derive(Debug)] -enum GGufError<'a> { - MagicMismatch, - EndianNotSupport, - VersionNotSupport, - #[allow(dead_code)] - Reading(GGufReadError<'a>), - FileSizeError, - SplitModeRepeated, -} - -impl<'a> GGufFile<'a> { - fn new(data: &'a [u8]) -> Result> { - let header = unsafe { data.as_ptr().cast::().read() }; - if !header.is_magic_correct() { - return Err(GGufError::MagicMismatch); - } - if !header.is_native_endian() { - return Err(GGufError::EndianNotSupport); - } - if header.version != 3 { - return Err(GGufError::VersionNotSupport); - } - - let cursor = header.nbytes(); - let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..]) - .map_err(GGufError::Reading)?; - - let cursor = cursor + meta_kvs.nbytes(); - let tensors = - GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?; - - let align = meta_kvs.alignment(); - let cursor = (cursor + tensors.nbytes() + align - 1) / align * align; - Ok(Self { - header, - meta_kvs, - tensors, - data: &data[cursor..], - }) - } -} - #[derive(Clone)] struct GGufFileInfo<'a> { output_path: String, @@ -113,13 +60,10 @@ impl SplitArgs { let mmap = unsafe { memmap2::Mmap::map(&file).unwrap() }; let ctx_gguf: GGufFile = GGufFile::new(&mmap).unwrap(); - let align = ctx_gguf.meta_kvs.alignment(); - let tensors = ctx_gguf - .tensors - .iter() - .map(move |t| (t, ctx_gguf.data)) - .collect::>(); + let align = ctx_gguf.get_meta_kvs().alignment(); let ggufs = self.split_strategy(ctx_gguf.clone()).unwrap(); + + let tensors = ctx_gguf.get_tensors_as_indexmap(); let mut tensor_iter: indexmap::map::Iter = tensors.iter(); for gguf in ggufs { @@ -131,6 +75,12 @@ impl SplitArgs { let kvs = gguf.meta_kvs.kvs(); for kv in kvs { + if kv.key() == LLM_KV_SPLIT_TENSORS_COUNT + || kv.key() == LLM_KV_SPLIT_COUNT + || kv.key() == LLM_KV_SPLIT_NO + { + continue; + } writer .write_meta_kv(kv.key(), kv.ty(), kv.value_bytes()) .unwrap(); @@ -247,14 +197,10 @@ impl SplitArgs { }, } - let tensors = ctx_gguf - .tensors - .iter() - .map(move |t| (t, ctx_gguf.data)) - .collect::>(); + let tensors = ctx_gguf.get_tensors_as_indexmap(); let mut ggufs: Vec = Vec::new(); - let n_tensors: u64 = ctx_gguf.header.tensor_count; + let n_tensors: u64 = ctx_gguf.get_header().tensor_count; let setup_gguf_file = |i_split: u64, n_tensors: u64| { let mut gguf_file = GGufFileInfo::new_empty(); @@ -273,13 +219,23 @@ impl SplitArgs { gguf_file }; - let mut i_split: u64 = 0; + let mut i_split: u64 = 1; let mut gguf_file = setup_gguf_file(i_split, n_tensors); - gguf_file.meta_kvs = ctx_gguf.meta_kvs.clone(); - gguf_file.header.metadata_kv_count += ctx_gguf.header.metadata_kv_count; + gguf_file.meta_kvs = ctx_gguf.get_meta_kvs().clone(); + + if gguf_file.meta_kvs.get(LLM_KV_SPLIT_NO).is_some() { + gguf_file.header.metadata_kv_count -= 1; + } + if gguf_file.meta_kvs.get(LLM_KV_SPLIT_COUNT).is_some() { + gguf_file.header.metadata_kv_count -= 1; + } + if gguf_file.meta_kvs.get(LLM_KV_SPLIT_TENSORS_COUNT).is_some() { + gguf_file.header.metadata_kv_count -= 1; + } + + gguf_file.header.metadata_kv_count += ctx_gguf.get_header().metadata_kv_count; if self.no_tensor_first_split { - i_split += 1; ggufs.push(gguf_file); gguf_file = setup_gguf_file(i_split, n_tensors); } @@ -328,11 +284,6 @@ impl SplitArgs { } } -#[inline(always)] -const fn pad(pos: usize, align: usize) -> usize { - (align - pos % align) % align -} - fn split_path(path_prefix: &String, split_no: usize, split_count: usize) -> String { format!("{}-{:05}-of-{:05}.gguf", path_prefix, split_no, split_count) } @@ -342,8 +293,8 @@ mod tests { use super::*; #[test] fn it_works() { - let input = PathBuf::from("/home/qinyiqun/gguf/xtask/src/h2o-danube3-500m-chat-F16.gguf"); - let output = Some("/home/qinyiqun/gguf/xtask/src/test/rust/rust".to_string()); + let input = PathBuf::from("/home/qinyiqun/gguf/xtask/src/test/rust/rust_ori.gguf"); + let output = Some("/home/qinyiqun/gguf/xtask/src/test/rust/rust_ori_oi".to_string()); let split_args = SplitArgs { input, output, From 259e5db7bf0bfb60a0c7621691282fdb74790327 Mon Sep 17 00:00:00 2001 From: qinyiqun Date: Mon, 22 Jul 2024 11:42:49 +0800 Subject: [PATCH 3/3] update style and ban mutiple split --- ggus/src/metadata/mod.rs | 12 ++++++- xtask/src/gguf_file.rs | 6 ++-- xtask/src/merge.rs | 6 ++-- xtask/src/split.rs | 68 +++++++++++++++------------------------- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/ggus/src/metadata/mod.rs b/ggus/src/metadata/mod.rs index b8502fc..76e3697 100644 --- a/ggus/src/metadata/mod.rs +++ b/ggus/src/metadata/mod.rs @@ -1,4 +1,4 @@ -//! See . +//! See . mod general; mod llm; @@ -141,6 +141,16 @@ impl<'a> GGufMetaKVPairs<'a> { .map(|(&key, &len)| GGufMetaKV { key, len }) } + pub fn remove(&mut self, _key: &str) -> bool { + match self.indices.swap_remove_entry(_key) { + Some((_, v)) => { + self.nbytes -= v; + true + } + None => false, + } + } + fn get_typed( &self, name: impl AsRef, diff --git a/xtask/src/gguf_file.rs b/xtask/src/gguf_file.rs index 85eb130..ef1c86d 100644 --- a/xtask/src/gguf_file.rs +++ b/xtask/src/gguf_file.rs @@ -51,15 +51,15 @@ impl<'a> GGufFile<'a> { }) } - pub(crate) fn get_header(&self) -> &GGufFileHeader { + pub fn header(&self) -> &GGufFileHeader { &self.header } - pub(crate) fn get_meta_kvs(&self) -> &GGufMetaKVPairs<'a> { + pub fn meta_kvs(&self) -> &GGufMetaKVPairs<'a> { &self.meta_kvs } - pub(crate) fn get_tensors_as_indexmap(&self) -> IndexMap { + pub fn tensors_as_indexmap(&self) -> IndexMap { self.tensors .iter() .map(move |t| (t, self.data)) diff --git a/xtask/src/merge.rs b/xtask/src/merge.rs index a8b0e03..db39245 100644 --- a/xtask/src/merge.rs +++ b/xtask/src/merge.rs @@ -45,7 +45,7 @@ impl MergeArgs { let kvs = files .iter() - .flat_map(|file| file.get_meta_kvs().kvs()) + .flat_map(|file| file.meta_kvs().kvs()) .filter(|kv| { let key = kv.key(); !key.starts_with("split.") && key != "general.alignment" @@ -53,7 +53,7 @@ impl MergeArgs { .collect::>(); let tensors = files .iter() - .flat_map(|file| file.get_tensors_as_indexmap()) + .flat_map(|file| file.tensors_as_indexmap()) .collect::>(); let out = File::create(shards.single_file()).unwrap(); @@ -62,7 +62,7 @@ impl MergeArgs { let align = files .iter() - .map(|file| file.get_meta_kvs().alignment()) + .map(|file| file.meta_kvs().alignment()) .max() .unwrap(); diff --git a/xtask/src/split.rs b/xtask/src/split.rs index c01760b..fc55bc1 100644 --- a/xtask/src/split.rs +++ b/xtask/src/split.rs @@ -1,4 +1,7 @@ -use crate::gguf_file::{pad, GGufError, GGufFile}; +use crate::{ + gguf_file::{pad, GGufError, GGufFile}, + loose_shards::LooseShards, +}; use ggus::{GGufFileHeader, GGufMetaDataValueType, GGufMetaKVPairs, GGufWriter}; use std::{fs::File, path::PathBuf}; @@ -50,6 +53,11 @@ impl<'a> GGufFileInfo<'a> { impl SplitArgs { pub fn split(self) { + let shards = LooseShards::from(&*self.input); + if shards.count() > 1 { + println!("Model has already been splited"); + return; + } let file = File::open(&self.input) .map_err(|e| { println!("Failed to open"); @@ -60,10 +68,10 @@ impl SplitArgs { let mmap = unsafe { memmap2::Mmap::map(&file).unwrap() }; let ctx_gguf: GGufFile = GGufFile::new(&mmap).unwrap(); - let align = ctx_gguf.get_meta_kvs().alignment(); + let align = ctx_gguf.meta_kvs().alignment(); let ggufs = self.split_strategy(ctx_gguf.clone()).unwrap(); - let tensors = ctx_gguf.get_tensors_as_indexmap(); + let tensors = ctx_gguf.tensors_as_indexmap(); let mut tensor_iter: indexmap::map::Iter = tensors.iter(); for gguf in ggufs { @@ -75,12 +83,6 @@ impl SplitArgs { let kvs = gguf.meta_kvs.kvs(); for kv in kvs { - if kv.key() == LLM_KV_SPLIT_TENSORS_COUNT - || kv.key() == LLM_KV_SPLIT_COUNT - || kv.key() == LLM_KV_SPLIT_NO - { - continue; - } writer .write_meta_kv(kv.key(), kv.ty(), kv.value_bytes()) .unwrap(); @@ -197,10 +199,10 @@ impl SplitArgs { }, } - let tensors = ctx_gguf.get_tensors_as_indexmap(); + let tensors = ctx_gguf.tensors_as_indexmap(); let mut ggufs: Vec = Vec::new(); - let n_tensors: u64 = ctx_gguf.get_header().tensor_count; + let n_tensors: u64 = ctx_gguf.header().tensor_count; let setup_gguf_file = |i_split: u64, n_tensors: u64| { let mut gguf_file = GGufFileInfo::new_empty(); @@ -221,20 +223,20 @@ impl SplitArgs { let mut i_split: u64 = 1; let mut gguf_file = setup_gguf_file(i_split, n_tensors); - gguf_file.meta_kvs = ctx_gguf.get_meta_kvs().clone(); + gguf_file.meta_kvs = ctx_gguf.meta_kvs().clone(); + + gguf_file.header.metadata_kv_count += ctx_gguf.header().metadata_kv_count; - if gguf_file.meta_kvs.get(LLM_KV_SPLIT_NO).is_some() { + if gguf_file.meta_kvs.remove(LLM_KV_SPLIT_NO) { gguf_file.header.metadata_kv_count -= 1; } - if gguf_file.meta_kvs.get(LLM_KV_SPLIT_COUNT).is_some() { + if gguf_file.meta_kvs.remove(LLM_KV_SPLIT_COUNT) { gguf_file.header.metadata_kv_count -= 1; } - if gguf_file.meta_kvs.get(LLM_KV_SPLIT_TENSORS_COUNT).is_some() { + if gguf_file.meta_kvs.remove(LLM_KV_SPLIT_TENSORS_COUNT) { gguf_file.header.metadata_kv_count -= 1; } - gguf_file.header.metadata_kv_count += ctx_gguf.get_header().metadata_kv_count; - if self.no_tensor_first_split { ggufs.push(gguf_file); gguf_file = setup_gguf_file(i_split, n_tensors); @@ -267,7 +269,12 @@ impl SplitArgs { let output_path = &self.output.unwrap(); let mut index = 0; while index < ggufs.len() { - ggufs[index].output_path = split_path(output_path, index + 1, ggufs.len()); + ggufs[index].output_path = format!( + "{}-{:05}-of-{:05}.gguf", + output_path, + index + 1, + ggufs.len() + ); ggufs[index].new_kv_tuples[1] = (LLM_KV_SPLIT_COUNT.to_string(), ty::U16, tensor_count); index += 1; } @@ -283,28 +290,3 @@ impl SplitArgs { } } } - -fn split_path(path_prefix: &String, split_no: usize, split_count: usize) -> String { - format!("{}-{:05}-of-{:05}.gguf", path_prefix, split_no, split_count) -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn it_works() { - let input = PathBuf::from("/home/qinyiqun/gguf/xtask/src/test/rust/rust_ori.gguf"); - let output = Some("/home/qinyiqun/gguf/xtask/src/test/rust/rust_ori_oi".to_string()); - let split_args = SplitArgs { - input, - output, - split_max_tensors: None, - split_max_size: Some("300M".to_string()), - no_tensor_first_split: true, - n_bytes_split: 0, - n_split_tensors: 0, - }; - - split_args.split(); - } -}