Skip to content

Commit

Permalink
Work towards packfiles + improve oid resolving
Browse files Browse the repository at this point in the history
  • Loading branch information
ttrssreal committed May 13, 2024
1 parent d21dcdf commit c5b4d9e
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 227 deletions.
2 changes: 1 addition & 1 deletion src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ pub struct CatFileMode {

#[arg(short = 't')]
pub kind: bool,
}
}
70 changes: 12 additions & 58 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,73 +1,27 @@
mod object;
mod store;
mod cli;

use std::fs::{self, DirEntry};
use cli::{Cli, Commands};
use clap::Parser;
use std::io::Write;
use hex;

use object::GitObjectStore;
use store::GitObjectStore;
use crate::cli::CatFileArgs;
use crate::store::util::resolve_id;

pub const MIN_USER_HASH_LEN: usize = 4;
pub const SHA1_HASH_SIZE: usize = 20;

fn hash_from_str(id_str: &str) -> Option<[u8; 20]> {

let id_len = id_str.len();

if id_len < MIN_USER_HASH_LEN || id_len > SHA1_HASH_SIZE * 2 {
eprintln!("Invalid hash length.");
return None;
}

let obj_dir = format!(".git/objects/{}/", &id_str[..2]);

let Ok(contents) = fs::read_dir(obj_dir) else {
eprintln!("Invalid hash.");
return None;
};

let matches: Vec<DirEntry> = contents
.map(|o| o.expect("hash_from_str(): ReadDir"))
.filter(|o| o
.file_name()
.into_string()
.expect("hash_from_str(): ReadDir")
.starts_with(&id_str[2..]))
.collect();

let matches_len = matches.len();

if matches_len == 0 {
eprintln!("Can't find hash.");
return None;
} else if matches_len > 1 {
eprintln!("Can't disambiguate hash.");
return None;
}

let found = &matches[0]
.file_name()
.into_string()
.ok()?;

let id = format!("{}{}", &id_str[..2], found);

let id = hex::decode(id).ok()?;

let id: [u8; 20] = id.try_into().ok()?;

Some(id)
}

fn main() -> Result<(), Box<dyn std::error::Error>> {

let cli = Cli::parse();

match cli.command {
Commands::CatFile(cat_file_args) => {
let id = hash_from_str(&cat_file_args.id).ok_or("Failed.")?;
Commands::CatFile(CatFileArgs {
mode,
id
}) => {
let id = resolve_id(&id).ok_or("Failed.")?;

let obj = match GitObjectStore::get(id) {
Some(obj) => obj,
Expand All @@ -76,12 +30,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {

let mut stdout = std::io::stdout();

if cat_file_args.mode.print {
if mode.print {
print!("{}", obj);
}

if cat_file_args.mode.kind {
print!("{}\n", obj.type_string());
if mode.kind {
print!("{}\n", obj.type_str());
}

stdout.flush()?;
Expand Down
221 changes: 53 additions & 168 deletions src/object.rs → src/store/loose.rs
Original file line number Diff line number Diff line change
@@ -1,129 +1,17 @@
use std::fmt::Display;
use std::fs::File;
use compress;
use std::io::Read;
use std::iter::{Peekable, Iterator};
use std::iter::Peekable;
use std::collections::HashMap;
use std::option::Option;
use crate::store::{
GitObject,
GitObjectData,
GitObjectStore,
TreeEntry,
ObjectId
};

use crate::SHA1_HASH_SIZE;

// "Each entry has a sha1 identifier, pathname and mode."
#[derive(Debug, PartialEq)]
pub struct TreeEntry {
pub mode: u32,
pub kind: String,
pub path: String,
pub id: [u8; SHA1_HASH_SIZE],
}

#[derive(Debug, PartialEq)]
pub enum GitObjectData {
Blob {
data: Vec<u8>,
},
Tree {
entries: Vec<TreeEntry>,
},
Commit {
tree: [u8; SHA1_HASH_SIZE],
parents: Vec<[u8; SHA1_HASH_SIZE]>,
// https://docs.github.com/en/pull-requests/committing-changes-to-your-project/creating-and-editing-commits/creating-a-commit-with-multiple-authors
// assuming git doesn't support multiple authors/committers
author: String,
committer: String,
encoding: Option<String>,
gpgsig: Option<String>,
message: Vec<u8>,
},
Tag {
object: [u8; SHA1_HASH_SIZE],
kind: String,
tag: String,
tagger: String,
// If signed the signature resides in the message itself
message: Vec<u8>,
},

}

#[derive(Debug)]
pub struct GitObject {
pub id: [u8; SHA1_HASH_SIZE],
pub size: usize,
pub data: GitObjectData,
}

impl GitObject {
pub fn type_string(&self) -> String {
match self.data {
GitObjectData::Blob {..} => "blob",
GitObjectData::Tree {..} => "tree",
GitObjectData::Commit {..} => "commit",
GitObjectData::Tag {..} => "tag",
}.to_string()
}
}

impl Display for TreeEntry {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:06o} {} {} {}", self.mode, self.kind, hex::encode(self.id), self.path)
}
}

impl Display for GitObject {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.data {
GitObjectData::Blob { data } => {
write!(f, "{}", String::from_utf8_lossy(&data))
},
GitObjectData::Tree { entries } => {
for entry in entries {
writeln!(f, "{}", entry)?;
}
Ok(())
},
GitObjectData::Commit {
tree,
parents,
author,
committer,
encoding,
gpgsig,
message
} => {
writeln!(f, "tree {}", hex::encode(tree))?;

for parent in parents {
writeln!(f, "parent {}", hex::encode(parent))?;
}

writeln!(f, "author {}", author)?;
writeln!(f, "committer {}", committer)?;

if let Some(encoding) = encoding {
writeln!(f, "encoding {}", encoding)?;
}

if let Some(gpgsig) = gpgsig {
writeln!(f, "gpgsig {}", gpgsig)?;
}

write!(f, "\n")?;
write!(f, "{}", String::from_utf8_lossy(message))
},
GitObjectData::Tag { object, kind, tag, tagger, message } => {
write!(f, "object {}\n", hex::encode(object))?;
write!(f, "type {}\n", kind)?;
write!(f, "tag {}\n", tag)?;
write!(f, "tagger {}\n", tagger)?;
write!(f, "\n")?;
write!(f, "{}", String::from_utf8_lossy(message))
},
}
}
}

fn parse_header<'a, I>(data: &mut Peekable<I>) -> Option<(String, String)>
where
I: Iterator<Item = &'a u8>
Expand Down Expand Up @@ -190,8 +78,9 @@ where

let path = path.to_string();
let mode = u32::from_str_radix(mode, 8).ok()?;
let id: [u8; SHA1_HASH_SIZE] = id.try_into().ok()?;
let kind = GitObjectStore::get(id)?.type_string();

let id: ObjectId = id.as_slice().try_into().ok()?;
let kind = GitObjectStore::get(id)?.type_str().to_string();

Some(TreeEntry {
mode,
Expand Down Expand Up @@ -242,14 +131,14 @@ fn parse_commit(data: &[u8]) -> Option<GitObjectData> {
// Decode the tree hash
let tree_headers = headers.get("tree")?;
let tree = hex::decode(tree_headers.first()?).ok()?;
let tree: [u8; SHA1_HASH_SIZE] = tree.try_into().ok()?;
let tree: ObjectId = tree.as_slice().try_into().ok()?;

// Decode all the parent hashes
let mut parents = Vec::new();
if let Some(pv) = headers.get("parent") {
for p in pv {
let decoded = hex::decode(p).ok()?;
parents.push(decoded.try_into().ok()?)
parents.push(decoded.as_slice().try_into().ok()?)
}
}

Expand Down Expand Up @@ -322,8 +211,8 @@ fn parse_tag(data: &[u8]) -> Option<GitObjectData> {
let tagger = headers.get("tagger")?
.first()?.to_string();

let object: [u8; SHA1_HASH_SIZE] = hex::decode(object)
.ok()?.try_into().ok()?;
let object: ObjectId = hex::decode(object)
.ok()?.as_slice().try_into().ok()?;

// Eat final newline before message body
if *data.next()? != b'\n' {
Expand All @@ -341,48 +230,44 @@ fn parse_tag(data: &[u8]) -> Option<GitObjectData> {
})
}

pub struct GitObjectStore;

impl GitObjectStore {
pub fn get(id: [u8; SHA1_HASH_SIZE]) -> Option<GitObject> {
let id_str = hex::encode(id);

let obj_path = format!(".git/objects/{}/{}", &id_str[..2], &id_str[2..]);
let obj_stream = File::open(obj_path).ok()?;

// Raw object
let mut data = Vec::new();

// Decompress
compress::zlib::Decoder::new(obj_stream)
.read_to_end(&mut data).ok()?;

// Git object TLV encoding:
// <obj-type> ' ' <byte-size> '\0' <object-data>
let [header, data] = data.splitn(2, |&b| b == b'\0')
.by_ref().collect::<Vec<&[u8]>>()[..] else {
return None;
};

let [kind, size] = header.splitn(2, |&b| b == b' ')
.by_ref().collect::<Vec<&[u8]>>()[..] else {
return None;
};

let size = String::from_utf8_lossy(size).parse::<usize>().ok()?;

let data = match kind {
b"blob" => parse_blob(data)?,
b"commit" => parse_commit(data)?,
b"tree" => parse_tree(data)?,
b"tag" => parse_tag(data)?,
_ => return None
pub fn get_loose_object(id: ObjectId) -> Option<GitObject> {
let id_str = id.to_string();

let obj_path = format!(".git/objects/{}/{}", &id_str[..2], &id_str[2..]);
let obj_stream = File::open(obj_path).ok()?;

// Raw object
let mut data = Vec::new();

// Decompress
compress::zlib::Decoder::new(obj_stream)
.read_to_end(&mut data).ok()?;

// Git object TLV encoding:
// <obj-type> ' ' <byte-size> '\0' <object-data>
let [header, data] = data.splitn(2, |&b| b == b'\0')
.by_ref().collect::<Vec<&[u8]>>()[..] else {
return None;
};

Some(GitObject {
id,
size,
data,
})
}
let [kind, size] = header.splitn(2, |&b| b == b' ')
.by_ref().collect::<Vec<&[u8]>>()[..] else {
return None;
};

let size = String::from_utf8_lossy(size).parse::<usize>().ok()?;

let data = match kind {
b"blob" => parse_blob(data)?,
b"commit" => parse_commit(data)?,
b"tree" => parse_tree(data)?,
b"tag" => parse_tag(data)?,
_ => return None
};

Some(GitObject {
id,
size,
data,
})
}
Loading

0 comments on commit c5b4d9e

Please sign in to comment.