Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smarter git handling #2

Merged
merged 5 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2025 Mohsen Azimi

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ You can place a file called `yek.toml` at your project root or pass a custom pat
1. Add custom ignore patterns
2. Define file priority rules for processing order
3. Add additional binary file extensions to ignore (extends the built-in list)
4. Configure Git-based priority boost

Example configuration:

Expand All @@ -160,6 +161,9 @@ patterns = [
"my_custom_folder/"
]

# Configure Git-based priority boost (optional)
git_boost_max = 50 # Maximum score boost based on Git history (default: 100)

# Define priority rules for processing order
# Higher scores are processed first
[[priority_rules]]
Expand Down Expand Up @@ -188,6 +192,7 @@ All configuration keys are optional. By default:

- No extra ignore patterns
- All files have equal priority (score: 1)
- Git-based priority boost maximum is 100
- Common binary file extensions are ignored (.jpg, .png, .exe, etc. - see source for full list)

## Planned Features
Expand Down
66 changes: 51 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use std::fs::{self, File, OpenOptions};
use std::io::{self, BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use std::process::{Command as SysCommand, Stdio};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tracing::{debug, info};
use walkdir::WalkDir;

Expand Down Expand Up @@ -47,6 +46,8 @@ pub struct YekConfig {
pub binary_extensions: Vec<String>,
#[serde(default)]
pub output_dir: Option<String>,
#[serde(default)]
pub git_boost_max: Option<i32>,
}

#[derive(Debug, Deserialize, Default, Clone)]
Expand Down Expand Up @@ -88,6 +89,7 @@ impl Default for YekConfig {
],
binary_extensions: Vec::new(), // User extensions only, we'll combine with BINARY_FILE_EXTENSIONS
output_dir: None,
git_boost_max: None,
}
}
}
Expand Down Expand Up @@ -115,6 +117,7 @@ fn default_priority_list() -> Vec<PriorityPattern> {
/// Default sets of ignore patterns (separate from .gitignore)
fn default_ignore_patterns() -> Vec<Regex> {
let raw = vec![
r"^LICENSE$",
r"^\.git/",
r"^\.next/",
r"^node_modules/",
Expand Down Expand Up @@ -485,6 +488,15 @@ pub fn serialize_repo(
// Get git commit times if available
let commit_times = get_recent_commit_times(base_path);

// If we have commit times, compute a "recentness" map
// that ranks all files from oldest to newest.
let recentness_boost = if let Some(ref times) = commit_times {
let max_boost = config.as_ref().and_then(|c| c.git_boost_max).unwrap_or(100);
Some(compute_recentness_boost(times, max_boost))
} else {
None
};

// Build gitignore matcher
let mut builder = GitignoreBuilder::new(base_path);
let gitignore_path = base_path.join(".gitignore");
Expand Down Expand Up @@ -584,20 +596,10 @@ pub fn serialize_repo(
&final_config.priority_list,
);

// Boost priority for recently modified files
if let Some(ref times) = commit_times {
if let Some(ts) = times.get(&pattern_path) {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_else(|_| Duration::from_secs(0))
.as_secs();
let age = now.saturating_sub(*ts);
if age < 60 * 60 * 24 * 7 {
// Files modified in last week get priority boost
// Add boost based on how recent the file is
let boost = 100 + ((60 * 60 * 24 * 7 - age) / (60 * 60)) as i32;
priority += boost;
}
// Apply rank-based boost if available
if let Some(ref boost_map) = recentness_boost {
if let Some(boost) = boost_map.get(&pattern_path) {
priority += *boost;
}
}

Expand Down Expand Up @@ -789,3 +791,37 @@ pub fn load_config_file(path: &Path) -> Option<YekConfig> {
}
}
}

/// Rank-based approach to compute how "recent" each file is (0=oldest, 1=newest).
/// Then scale it to a user-defined or default max boost.
fn compute_recentness_boost(
commit_times: &HashMap<String, u64>,
max_boost: i32,
) -> HashMap<String, i32> {
if commit_times.is_empty() {
return HashMap::new();
}

// Sort by ascending commit time
let mut sorted: Vec<(&String, &u64)> = commit_times.iter().collect();
sorted.sort_by_key(|(_, t)| **t);

// oldest file => rank=0, newest => rank=1
let last_index = sorted.len().saturating_sub(1) as f64;
if last_index < 1.0 {
// If there's only one file, or zero, no boosts make sense
let mut single = HashMap::new();
for file in commit_times.keys() {
single.insert(file.clone(), 0);
}
return single;
}

let mut result = HashMap::new();
for (i, (path, _time)) in sorted.iter().enumerate() {
let rank = i as f64 / last_index; // 0.0..1.0
let boost = (rank * max_boost as f64).round() as i32;
result.insert((*path).clone(), boost);
}
result
}
20 changes: 20 additions & 0 deletions yek.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
output_dir = "./repo-serialized"

[ignore_patterns]
patterns = [
"^repo-serialized/"
]

[[priority_rules]]
score = 100
patterns = ["^src/"]

[[priority_rules]]
score = 70
patterns = ["^src/lib/", "^test/"]

[[priority_rules]]
score = 30
patterns = ["^scripts/"]

output_dir = "./repo-serialized"
Loading