Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: doc string parse to prevent the regex calling and remove the prec2 deps #1219

Merged
merged 1 commit into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion kclvm/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion kclvm/sema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ petgraph = "0.6.0"
anyhow = "1.0"
regex = "1.7.0"
lazy_static = "1.4.0"
pcre2 = "*"

kclvm-ast = { path = "../ast" }
kclvm-ast-pretty = { path = "../ast_pretty" }
Expand Down
95 changes: 35 additions & 60 deletions kclvm/sema/src/resolver/doc.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
use kclvm_ast::ast::SchemaStmt;
use pcre2::bytes::Regex;
use std::collections::{HashMap, HashSet};
use std::iter::Iterator;
use std::str;

lazy_static::lazy_static! {
static ref RE: Regex = Regex::new(r#"(?s)^(['\"]{3})(.*?)(['\"]{3})$"#).unwrap();
}

/// strip leading and trailing triple quotes from the original docstring content
fn strip_quotes(original: &mut String) {
let quote = original.chars().next().unwrap();
if quote != '"' && quote != '\'' {
return;
}
if let Ok(Some(mat)) = RE.find(original.as_bytes()) {
let content = str::from_utf8(&original.as_bytes()[mat.start() + 3..mat.end() - 3])
.unwrap()
.to_owned();
*original = content;
const SINGLE_QUOTES_STR: &str = "'''";
const DOUBLE_QUOTES_STR: &str = "\"\"\"";

/// Strip leading and trailing triple quotes from the original docstring content
fn strip_quotes(original: &str) -> &str {
match original.chars().next() {
Some('\'') => match original.strip_prefix(SINGLE_QUOTES_STR) {
Some(s) => match s.strip_suffix(SINGLE_QUOTES_STR) {
Some(s) => s,
None => original,
},
None => original,
},
Some('"') => match original.strip_prefix(DOUBLE_QUOTES_STR) {
Some(s) => match s.strip_suffix(DOUBLE_QUOTES_STR) {
Some(s) => s,
None => original,
},
None => original,
},
_ => original,
}
}

Expand All @@ -27,7 +32,7 @@ fn expand_tabs(s: &str, spaces_per_tab: usize) -> String {
}

/// Clean up indentation by removing any common leading whitespace on all lines after the first line.
fn clean_doc(doc: &mut String) {
fn clean_doc(doc: &str) -> String {
let tab_expanded = expand_tabs(&doc, 4);
let mut lines: Vec<&str> = tab_expanded.split('\n').collect();
// Find minimum indentation of any non-blank lines after first line.
Expand Down Expand Up @@ -60,7 +65,7 @@ fn clean_doc(doc: &mut String) {
lines.remove(0);
}
}
*doc = lines.join("\n");
lines.join("\n")
}

/// A line-based string reader.
Expand Down Expand Up @@ -158,27 +163,6 @@ impl Reader {
}
}

/// remove the leading and trailing empty lines
fn _strip(doc: Vec<String>) -> Vec<String> {
let mut i = 0;
let mut j = 0;
for (line_num, line) in doc.iter().enumerate() {
if !line.trim().is_empty() {
i = line_num;
break;
}
}

for (line_num, line) in doc.iter().enumerate().rev() {
if !line.trim().is_empty() {
j = line_num;
break;
}
}

doc[i..j + 1].to_vec()
}

/// Checks if current line is at the beginning of a section
fn is_at_section(doc: &mut Reader) -> bool {
doc.seek_next_non_empty_line();
Expand All @@ -202,7 +186,7 @@ fn is_at_section(doc: &mut Reader) -> bool {
l2.starts_with(&"-".repeat(l1.len())) || l2.starts_with(&"=".repeat(l1.len()))
}

/// read lines before next section beginning, continuous empty lines will be merged to one
/// Reads lines before next section beginning, continuous empty lines will be merged to one
fn read_to_next_section(doc: &mut Reader) -> Vec<String> {
let mut section = doc.read_to_next_empty_line();

Expand All @@ -215,7 +199,7 @@ fn read_to_next_section(doc: &mut Reader) -> Vec<String> {
section
}

/// parse the Attribute Section of the docstring to list of Attribute
/// Parse the Attribute Section of the docstring to list of Attribute
fn parse_attr_list(content: String) -> Vec<Attribute> {
let mut r = Reader::new(content);
let mut attrs = vec![];
Expand All @@ -239,7 +223,7 @@ fn parse_attr_list(content: String) -> Vec<Attribute> {
attrs
}

/// parse the summary of the schema. The final summary content will be a concat of lines in the original summary with whitespace.
/// Parse the summary of the schema. The final summary content will be a concat of lines in the original summary with whitespace.
fn parse_summary(doc: &mut Reader) -> String {
if is_at_section(doc) {
// no summary provided
Expand All @@ -255,17 +239,14 @@ fn parse_summary(doc: &mut Reader) -> String {
.to_string()
}

/// parse the schema docstring to Doc.
/// Parse the schema docstring to Doc.
/// The summary of the schema content will be concatenated to a single line string by whitespaces.
/// The description of each attribute will be returned as separate lines.
pub fn parse_doc_string(ori: &String) -> Doc {
pub fn parse_doc_string(ori: &str) -> Doc {
if ori.is_empty() {
return Doc::new("".to_string(), vec![], HashMap::new());
}
let mut ori = ori.clone();
strip_quotes(&mut ori);
clean_doc(&mut ori);
let mut doc = Reader::new(ori);
let mut doc = Reader::new(clean_doc(strip_quotes(&ori)));
doc.reset();
let summary = parse_summary(&mut doc);

Expand Down Expand Up @@ -441,17 +422,13 @@ de",
];

for (ori, res) in oris.iter().zip(results.iter()) {
let from = &mut ori.to_string();
strip_quotes(from);
assert_eq!(from.to_string(), res.to_string());
assert_eq!(strip_quotes(ori).to_string(), res.to_string());
}
}

#[test]
fn test_clean_doc() {
let mut ori = read_doc_content();
strip_quotes(&mut ori);
clean_doc(&mut ori);
let ori = clean_doc(strip_quotes(&read_doc_content()));
let expect_cleaned = r#"Server is the common user interface for long-running
services adopting the best practice of Kubernetes.

Expand Down Expand Up @@ -566,14 +543,13 @@ unindented line

#[test]
fn test_at_section() {
let mut data = "Summary
let data = "Summary
Attribute
---------
description"
.to_string();

clean_doc(&mut data);

let data = clean_doc(&data);
let mut doc = Reader::new(data);
assert!(!is_at_section(&mut doc));

Expand All @@ -586,7 +562,7 @@ unindented line

#[test]
fn test_read_to_next_section() {
let mut data = "Summary
let data = "Summary


SummaryContinue
Expand All @@ -610,8 +586,7 @@ unindented line
--------
content"
.to_string();
clean_doc(&mut data);

let data = clean_doc(&data);
let mut doc = Reader::new(data);
assert_eq!(
read_to_next_section(&mut doc),
Expand Down
Loading