Skip to content

Commit

Permalink
Merge pull request #1 from qinyiqun/AddGGuFSplit
Browse files Browse the repository at this point in the history
add split
  • Loading branch information
YdrMaster authored Jul 24, 2024
2 parents a3f8667 + 259e5db commit b7b1f1f
Show file tree
Hide file tree
Showing 8 changed files with 398 additions and 69 deletions.
2 changes: 1 addition & 1 deletion ggus/src/header.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::sizeof;
use std::str::Utf8Error;

#[derive(Default, Debug)]
#[derive(Default, Debug, Clone)]
#[repr(C)]
pub struct GGufFileHeader {
magic: [u8; 4],
Expand Down
20 changes: 19 additions & 1 deletion ggus/src/metadata/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! See <https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-key-value-pairs>.
//! See <https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#standardized-key-value-pairs>.
mod general;
mod llm;
Expand Down Expand Up @@ -82,6 +82,14 @@ pub struct GGufMetaKVPairs<'a> {
}

impl<'a> GGufMetaKVPairs<'a> {
pub fn new(nbytes_: usize) -> Self {
let indices_ = IndexMap::new();
Self {
indices: indices_,
nbytes: nbytes_,
}
}

pub fn scan(count: u64, data: &'a [u8]) -> Result<Self, GGufReadError<'a>> {
let mut reader = GGufReader::new(data);
let mut indices = IndexMap::with_capacity(count as _);
Expand Down Expand Up @@ -133,6 +141,16 @@ impl<'a> GGufMetaKVPairs<'a> {
.map(|(&key, &len)| GGufMetaKV { key, len })
}

pub fn remove(&mut self, _key: &str) -> bool {
match self.indices.swap_remove_entry(_key) {
Some((_, v)) => {
self.nbytes -= v;
true
}
None => false,
}
}

fn get_typed(
&self,
name: impl AsRef<str>,
Expand Down
8 changes: 8 additions & 0 deletions ggus/src/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ pub struct GGufTensors<'a> {
}

impl<'a> GGufTensors<'a> {
pub fn new(nbytes_: usize) -> Self {
let indices_: IndexMap<&str, ()> = IndexMap::new();
Self {
indices: indices_,
nbytes: nbytes_,
}
}

pub fn scan(count: u64, data: &'a [u8]) -> Result<Self, GGufReadError<'a>> {
let mut reader = GGufReader::new(data);
let mut indices = IndexMap::with_capacity(count as _);
Expand Down
6 changes: 3 additions & 3 deletions ggus/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ pub struct GGufWriter<T: Write>(BufWriter<T>, usize);
impl<T: Write> GGufWriter<T> {
#[inline]
pub fn new(writer: T, header: GGufFileHeader) -> Result<Self> {
let mut buf = BufWriter::new(writer);
buf.write_all(as_slice(&header))?;
Ok(Self(buf, 0))
let mut ans = Self(BufWriter::new(writer), 0);
ans.write_bytes(as_slice(&header))?;
Ok(ans)
}

#[inline]
Expand Down
73 changes: 73 additions & 0 deletions xtask/src/gguf_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use ggus::{GGufFileHeader, GGufMetaKVPairs, GGufReadError, GGufTensors};
use indexmap::IndexMap;

#[derive(Clone)]
pub(crate) struct GGufFile<'a> {
header: GGufFileHeader,
meta_kvs: GGufMetaKVPairs<'a>,
tensors: GGufTensors<'a>,
data: &'a [u8],
}

#[derive(Debug)]
pub(crate) enum GGufError<'a> {
MagicMismatch,
EndianNotSupport,
VersionNotSupport,
#[allow(dead_code)]
Reading(GGufReadError<'a>),
FileSizeError,
SplitModeRepeated,
}

impl<'a> GGufFile<'a> {
pub(crate) fn new(data: &'a [u8]) -> Result<Self, GGufError<'a>> {
let header = unsafe { data.as_ptr().cast::<GGufFileHeader>().read() };
if !header.is_magic_correct() {
return Err(GGufError::MagicMismatch);
}
if !header.is_native_endian() {
return Err(GGufError::EndianNotSupport);
}
if header.version != 3 {
return Err(GGufError::VersionNotSupport);
}

let cursor = header.nbytes();
let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..])
.map_err(GGufError::Reading)?;

let cursor = cursor + meta_kvs.nbytes();
let tensors =
GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?;

let align = meta_kvs.alignment();
let cursor = (cursor + tensors.nbytes() + align - 1) / align * align;
Ok(Self {
header,
meta_kvs,
tensors,
data: &data[cursor..],
})
}

pub fn header(&self) -> &GGufFileHeader {
&self.header
}

pub fn meta_kvs(&self) -> &GGufMetaKVPairs<'a> {
&self.meta_kvs
}

pub fn tensors_as_indexmap(&self) -> IndexMap<ggus::GGufTensorInfo, &[u8]> {
self.tensors
.iter()
.map(move |t| (t, self.data))
.collect::<IndexMap<_, _>>()
}
}

#[inline(always)]
pub(crate) const fn pad(pos: usize, align: usize) -> usize {
(align - pos % align) % align
}
1 change: 1 addition & 0 deletions xtask/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod gguf_file;
mod loose_shards;
mod merge;
mod show;
Expand Down
70 changes: 10 additions & 60 deletions xtask/src/merge.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::loose_shards::LooseShards;
use ggus::{
GGufFileHeader, GGufMetaDataValueType, GGufMetaKVPairs, GGufReadError, GGufTensors, GGufWriter,
use crate::{
gguf_file::{pad, GGufFile},
loose_shards::LooseShards,
};
use ggus::{GGufFileHeader, GGufMetaDataValueType, GGufWriter};
use indexmap::{IndexMap, IndexSet};
use std::{fs::File, iter::zip, path::PathBuf, thread};

Expand Down Expand Up @@ -44,32 +45,32 @@ impl MergeArgs {

let kvs = files
.iter()
.flat_map(|file| file.meta_kvs.kvs())
.flat_map(|file| file.meta_kvs().kvs())
.filter(|kv| {
let key = kv.key();
!key.starts_with("split.") && key != "general.alignment"
})
.collect::<IndexSet<_>>();
let tensors = files
.iter()
.flat_map(|file| file.tensors.iter().map(move |t| (t, file.data)))
.flat_map(|file| file.tensors_as_indexmap())
.collect::<IndexMap<_, _>>();

let out = File::create(shards.single_file()).unwrap();
let header = GGufFileHeader::new(3, tensors.len() as _, (kvs.len() + 1) as _);
let mut writer: GGufWriter<File> = GGufWriter::new(out, header).unwrap();
let mut writer = GGufWriter::new(out, header).unwrap();

let align = files
.iter()
.map(|file| file.meta_kvs.alignment())
.map(|file| file.meta_kvs().alignment())
.max()
.unwrap();

writer
.write_meta_kv(
"general.alignment",
GGufMetaDataValueType::U64,
(align as u64).to_le_bytes(),
GGufMetaDataValueType::U32,
(align as u32).to_le_bytes(),
)
.unwrap();

Expand Down Expand Up @@ -108,54 +109,3 @@ impl MergeArgs {
}
}
}

#[inline(always)]
const fn pad(pos: usize, align: usize) -> usize {
(align - pos % align) % align
}

struct GGufFile<'a> {
meta_kvs: GGufMetaKVPairs<'a>,
tensors: GGufTensors<'a>,
data: &'a [u8],
}

#[derive(Debug)]
enum GGufError<'a> {
MagicMismatch,
EndianNotSupport,
VersionNotSupport,
#[allow(dead_code)]
Reading(GGufReadError<'a>),
}

impl<'a> GGufFile<'a> {
fn new(data: &'a [u8]) -> Result<Self, GGufError<'a>> {
let header = unsafe { data.as_ptr().cast::<GGufFileHeader>().read() };
if !header.is_magic_correct() {
return Err(GGufError::MagicMismatch);
}
if !header.is_native_endian() {
return Err(GGufError::EndianNotSupport);
}
if header.version != 3 {
return Err(GGufError::VersionNotSupport);
}

let cursor = header.nbytes();
let meta_kvs = GGufMetaKVPairs::scan(header.metadata_kv_count, &data[cursor..])
.map_err(GGufError::Reading)?;

let cursor = cursor + meta_kvs.nbytes();
let tensors =
GGufTensors::scan(header.tensor_count, &data[cursor..]).map_err(GGufError::Reading)?;

let align = meta_kvs.alignment();
let cursor = (cursor + tensors.nbytes() + align - 1) / align * align;
Ok(Self {
meta_kvs,
tensors,
data: &data[cursor..],
})
}
}
Loading

0 comments on commit b7b1f1f

Please sign in to comment.