Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Ignore_word_pairs #79

Merged
merged 2 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/file/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use super::content::wikilink::Alias;
/// # Example
/// `asdf/Foo___Bar.md` -> `Foo___Bar`
#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct Filename(String);
pub struct Filename(pub String);

impl Filename {
#[must_use]
Expand All @@ -47,7 +47,7 @@ impl From<String> for Filename {

/// Sometimes you are given a lowercase [`Filename`] and you have to make due
#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct FilenameLowercase(String);
pub struct FilenameLowercase(pub String);

impl FilenameLowercase {
#[must_use]
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use console::{style, Emoji};
use file::{get_files, name::ngrams};
use indicatif::ProgressBar;
use miette::{Diagnostic, Result};
use ngrams::MissingSubstringError;
use ngrams::CalculateError;
use rules::{
broken_wikilink::BrokenWikilinkVisitor, duplicate_alias::DuplicateAliasVisitor,
similar_filename::SimilarFilename, Report, ReportTrait, ThirdPassRule,
Expand Down Expand Up @@ -89,7 +89,7 @@ pub enum OutputErrors {
#[error(transparent)]
RegexError(#[from] regex::Error),
#[error(transparent)]
MissingSubstringError(#[from] MissingSubstringError),
CalculateError(#[from] CalculateError),
#[error(transparent)]
ParseError(#[from] ParseError),
#[error(transparent)]
Expand Down
20 changes: 14 additions & 6 deletions src/ngrams.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,23 @@ use std::{
path::PathBuf,
};

use regex::Regex;
use regex::{Error as RegexError, Regex};
use thiserror::Error;

#[derive(Error, Debug)]
#[error("{path} does not contain the ngram {ngram}")]
pub struct MissingSubstringError {
pub path: PathBuf,
pub ngram: String,
pub backtrace: std::backtrace::Backtrace,
pub enum CalculateError {
#[error("{path} does not contain the ngram {ngram}")]
MissingSubstringError {
path: PathBuf,
ngram: String,
backtrace: std::backtrace::Backtrace,
},
#[error("'{compilation_string}' regex did not compile: {source:?}")]
RegexCompilationError {
source: RegexError,
compilation_string: String,
backtrace: std::backtrace::Backtrace,
},
}

/// An ngram, " " seperated, lowercase
Expand Down
26 changes: 6 additions & 20 deletions src/rules/duplicate_alias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{
content::{front_matter::FrontMatterVisitor, wikilink::Alias},
name::{get_filename, Filename},
},
ngrams::MissingSubstringError,
ngrams::CalculateError,
sed::{ReplacePair, ReplacePairCompilationError},
visitor::{FinalizeError, VisitError, Visitor},
};
Expand Down Expand Up @@ -185,27 +185,13 @@ impl Visitor for DuplicateAliasVisitor {
#[derive(Error, Debug)]
pub enum NewDuplicateAliasError {
#[error(transparent)]
MissingSubstringError(#[from] MissingSubstringError),
CalculateError(#[from] CalculateError),
#[error(transparent)]
ReplacePairError(#[from] ReplacePairCompilationError),
#[error("The file {filename} contains its own alias {alias}")]
AliasAndFilenameSame { filename: Filename, alias: Alias },
}
//
// #[derive(Error, Debug)]
// pub enum CalculateError {
// #[error(transparent)]
// MissingSubstringError(#[from] MissingSubstringError),
// #[error(transparent)]
// ReplacePairError(#[from] ReplacePairCompilationError),
// #[error(transparent)]
// FileError(#[from] file::Error),
// #[error(transparent)]
// NewDuplicateAliasError(#[from] NewDuplicateAliasError),
// #[error(transparent)]
// IoError(#[from] std::io::Error),
// }
//

impl DuplicateAlias {
/// Create a new diagnostic
/// based on the two filenames and their similar ngrams
Expand Down Expand Up @@ -247,7 +233,7 @@ impl DuplicateAlias {
let file2_content_found = file2_content
.to_lowercase()
.find(&alias.to_string())
.ok_or_else(|| MissingSubstringError {
.ok_or_else(|| CalculateError::MissingSubstringError {
path: file2_path.to_path_buf(),
ngram: alias.to_string(),
backtrace: std::backtrace::Backtrace::capture(),
Expand Down Expand Up @@ -280,15 +266,15 @@ impl DuplicateAlias {
let file1_content_found = file1_content
.to_lowercase()
.find(&alias.to_string())
.ok_or_else(|| MissingSubstringError {
.ok_or_else(|| CalculateError::MissingSubstringError {
path: file1_path.to_path_buf(),
ngram: alias.to_string(),
backtrace: std::backtrace::Backtrace::capture(),
})?;
let file2_content_found = file2_content
.to_lowercase()
.find(&alias.to_string())
.ok_or_else(|| MissingSubstringError {
.ok_or_else(|| CalculateError::MissingSubstringError {
path: file2_path.to_path_buf(),
ngram: alias.to_string(),
backtrace: std::backtrace::Backtrace::capture(),
Expand Down
63 changes: 43 additions & 20 deletions src/rules/similar_filename.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
config::Config,
file::name::get_filename,
ngrams::{MissingSubstringError, Ngram},
ngrams::{CalculateError, Ngram},
rules::HasId,
};
use console::{style, Emoji};
Expand All @@ -11,6 +11,7 @@ use hashbrown::HashMap;
use indicatif::ProgressBar;
use miette::{Diagnostic, SourceOffset, SourceSpan};
use regex::Regex;
use std::backtrace::Backtrace;
use std::{
env,
path::{Path, PathBuf},
Expand Down Expand Up @@ -79,7 +80,7 @@ impl SimilarFilename {
file2_ngram: &Ngram,
spacing_regex: &Regex,
score: i64,
) -> Result<Self, MissingSubstringError> {
) -> Result<Self, CalculateError> {
// file paths as strings
let file1 = file1_path.to_string_lossy().to_lowercase();
let file2 = file2_path.to_string_lossy().to_lowercase();
Expand All @@ -92,15 +93,15 @@ impl SimilarFilename {
let find1 = spacing_regex
.replace_all(&file1, " ")
.find(&file1_ngram.to_string())
.ok_or_else(|| MissingSubstringError {
.ok_or_else(|| CalculateError::MissingSubstringError {
path: file1_path.to_path_buf(),
ngram: file1_ngram.to_string(),
backtrace: std::backtrace::Backtrace::capture(),
})?;
let find2 = spacing_regex
.replace_all(&file2, " ")
.find(&file2_ngram.to_string())
.ok_or_else(|| MissingSubstringError {
.ok_or_else(|| CalculateError::MissingSubstringError {
path: file2_path.to_path_buf(),
ngram: file2_ngram.to_string(),
backtrace: std::backtrace::Backtrace::capture(),
Expand Down Expand Up @@ -147,7 +148,7 @@ impl SimilarFilename {
filename_match_threshold: i64,
spacing_regex: &Regex,
config: &Config,
) -> Result<Vec<SimilarFilename>, MissingSubstringError> {
) -> Result<Vec<SimilarFilename>, CalculateError> {
// Convert all filenames to a single string
// Check if any two file ngrams fuzzy match
// TODO: Unfortunately this is O(n^2)
Expand All @@ -168,18 +169,20 @@ impl SimilarFilename {
let matcher = SkimMatcherV2::default();
let mut matches: Vec<SimilarFilename> = Vec::new();
for (ngram, filepath) in file_ngrams {
for (other_ngram, other_filepath) in file_ngrams {
'outer: for (other_ngram, other_filepath) in file_ngrams {
if ngram.nb_words() != other_ngram.nb_words() {
continue;
}

// TODO: This can be improved computationally using a hashmap
// Skip based on ignore_word_pairs
for (a, b) in &config.ignore_word_pairs {
println!("{a} || {b}");
if &ngram.to_string() == a && &other_ngram.to_string() == b {
continue;
continue 'outer;
}
if &ngram.to_string() == b && &other_ngram.to_string() == a {
continue;
continue 'outer;
}
}

Expand All @@ -193,7 +196,7 @@ impl SimilarFilename {
}

// Each editor will have its own special cases, lets centralize them
if SimilarFilename::skip_special_cases(filepath, other_filepath) {
if SimilarFilename::skip_special_cases(filepath, other_filepath, spacing_regex)? {
continue;
}

Expand Down Expand Up @@ -223,17 +226,37 @@ impl SimilarFilename {

/// Each editor will have its own special cases, lets centralize them
impl SimilarFilename {
/// Centralize the special cases for skipping
fn skip_special_cases(file1: &Path, file2: &Path) -> bool {
SimilarFilename::logseq_same_group(file1, file2)
}
pub fn skip_special_cases(
file1: &Path,
file2: &Path,
spacing_regex: &Regex,
) -> Result<bool, CalculateError> {
let file1_str = get_filename(file1).0;
let file2_str = get_filename(file2).0;

// If file1 is a prefix of file2 (with spacing), or file2 is a prefix of file1 (with spacing)
// TODO: Compiling regex inside a loop is expensive
let regex_str1 = format!("^{}({})", regex::escape(&file1_str), spacing_regex.as_str());
let file1_is_prefix =
Regex::new(&regex_str1).map_err(|e| CalculateError::RegexCompilationError {
source: e,
compilation_string: regex_str1,
backtrace: Backtrace::force_capture(),
})?;
let regex_str2 = format!("^{}({})", regex::escape(&file2_str), spacing_regex.as_str());
let file2_is_prefix =
Regex::new(&regex_str2).map_err(|e| CalculateError::RegexCompilationError {
source: e,
compilation_string: regex_str2,
backtrace: Backtrace::force_capture(),
})?;

/// Logseq has a special case if one startswith the other then
/// its probably a part of the same group
fn logseq_same_group(file1: &Path, file2: &Path) -> bool {
let file1 = get_filename(file1);
let file2 = get_filename(file2);
file1.to_string().starts_with(&file2.to_string())
|| file2.to_string().starts_with(&file1.to_string())
let out1 = file1_is_prefix.is_match(&file2_str);
let out2 = file2_is_prefix.is_match(&file1_str);
let out = out1 || out2;
println!(
"({file1_str:?}, {file2_str:?}, {spacing_regex:?}) => ({out1} || {out2}) => {out}"
);
Ok(out)
}
}
1 change: 1 addition & 0 deletions src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ pub enum ParseError {
}

/// Parse the source code and visit all the nodes using tree-sitter
#[allow(clippy::result_large_err)]
pub fn parse(path: &PathBuf, visitors: Vec<Rc<RefCell<dyn Visitor>>>) -> Result<(), ParseError> {
debug!("Parsing file {:?}", path);
let source = std::fs::read_to_string(path).map_err(|source| ParseError::IoError {
Expand Down
22 changes: 11 additions & 11 deletions tests/logseq/broken_wikilink/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ lazy_static! {
#[test]
fn number_of_broken_wikilinks() {
info!("number_of_broken_wikilinks");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:?}");
}
Expand All @@ -28,7 +28,7 @@ fn number_of_broken_wikilinks() {
#[test]
fn lorem_exist_and_is_wikilink() {
info!("lorem_exist_and_is_wikilink");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:?}");
}
Expand All @@ -43,7 +43,7 @@ fn lorem_exist_and_is_wikilink() {
#[test]
fn ipsum_does_not_exist_and_is_wikilink() {
info!("ipsum_does_not_exist_and_is_wikilink");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:?}");
}
Expand All @@ -58,7 +58,7 @@ fn ipsum_does_not_exist_and_is_wikilink() {
#[test]
fn dolor_does_not_exist_and_is_not_wikilink_in_journal() {
info!("dolor_does_not_exist_and_is_not_wikilink_in_journal");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -74,7 +74,7 @@ fn dolor_does_not_exist_and_is_not_wikilink_in_journal() {
#[test]
fn sit_exists_and_is_tag() {
info!("sit_exists_and_is_tag");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -90,7 +90,7 @@ fn sit_exists_and_is_tag() {
#[test]
fn amet_does_not_exist_and_is_fancy_tag() {
info!("amet_does_not_exist_and_is_fancy_tag");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -106,7 +106,7 @@ fn amet_does_not_exist_and_is_fancy_tag() {
#[test]
fn consectetur_does_not_exist_and_is_tag() {
info!("consectetur_does_not_exist_and_is_tag");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -122,7 +122,7 @@ fn consectetur_does_not_exist_and_is_tag() {
#[test]
fn adipiscing_does_not_exist_and_is_tag() {
info!("adipiscing_does_not_exist_and_is_tag");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -138,7 +138,7 @@ fn adipiscing_does_not_exist_and_is_tag() {
#[test]
fn elit_exists_and_is_tag() {
info!("elit_exists_and_is_tag");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -152,7 +152,7 @@ fn elit_exists_and_is_tag() {
#[test]
fn dolor_does_not_exist_and_is_wikilink_in_foo() {
info!("dolor_does_not_exist_and_is_not_wikilink_in_foo");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand All @@ -166,7 +166,7 @@ fn dolor_does_not_exist_and_is_wikilink_in_foo() {
#[test]
fn dolor_does_not_exist_and_is_wikilink_in_foo_span() {
info!("dolor_does_not_exist_and_is_not_wikilink_in_foo");
let report = get_report(PATHS.as_slice());
let report = get_report(PATHS.as_slice(), None);
for broken_wikilink in &report.broken_wikilinks() {
debug!("{broken_wikilink:#?}");
}
Expand Down
Loading
Loading