Skip to content

Commit

Permalink
simplify injection_for_match
Browse files Browse the repository at this point in the history
Split out injection pair logic into its own method to make the overall
flow easier to follow.

Also transform the top-level function into a method on a
HighlightConfiguration.
  • Loading branch information
nrdxp committed Oct 5, 2022
1 parent 79f70cf commit 4947c74
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 106 deletions.
208 changes: 104 additions & 104 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,12 +538,14 @@ impl Loader {

pub fn language_configuration_for_injection_string(
&self,
capture: &InjectionCapture,
capture: &InjectionLanguageMarker,
) -> Option<Arc<LanguageConfiguration>> {
match capture {
InjectionCapture::Name(string) => self.language_config_for_name(string),
InjectionCapture::File(file) => self.language_config_for_file_name(file),
InjectionCapture::Shebang(shebang) => self.language_config_for_language_id(shebang),
InjectionLanguageMarker::Name(string) => self.language_config_for_name(string),
InjectionLanguageMarker::Filename(file) => self.language_config_for_file_name(file),
InjectionLanguageMarker::Shebang(shebang) => {
self.language_config_for_language_id(shebang)
}
}
}

Expand Down Expand Up @@ -635,7 +637,7 @@ impl Syntax {
queue.push_back(self.root);

let scopes = self.loader.scopes.load();
let injection_callback = |language: &InjectionCapture| {
let injection_callback = |language: &InjectionLanguageMarker| {
self.loader
.language_configuration_for_injection_string(language)
.and_then(|language_config| language_config.highlight_config(&scopes))
Expand Down Expand Up @@ -769,12 +771,9 @@ impl Syntax {
);
let mut injections = Vec::new();
for mat in matches {
let (injection_capture, content_node, included_children) = injection_for_match(
&layer.config,
&layer.config.injections_query,
&mat,
source_slice,
);
let (injection_capture, content_node, included_children) = layer
.config
.injection_for_match(&layer.config.injections_query, &mat, source_slice);

// Explicitly remove this match so that none of its other captures will remain
// in the stream of captures.
Expand Down Expand Up @@ -810,13 +809,9 @@ impl Syntax {
);
for mat in matches {
let entry = &mut injections_by_pattern_index[mat.pattern_index];
let (injection_capture, content_node, included_children) =
injection_for_match(
&layer.config,
combined_injections_query,
&mat,
source_slice,
);
let (injection_capture, content_node, included_children) = layer
.config
.injection_for_match(combined_injections_query, &mat, source_slice);
if injection_capture.is_some() {
entry.0 = injection_capture;
}
Expand Down Expand Up @@ -1415,6 +1410,94 @@ impl HighlightConfiguration {

self.highlight_indices.store(Arc::new(indices));
}

fn injection_pair<'a>(
&self,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
let mut injection_capture = None;
let mut content_node = None;

for capture in query_match.captures {
let index = Some(capture.index);
if index == self.injection_language_capture_index {
let name = byte_range_to_str(capture.node.byte_range(), source);
injection_capture = Some(InjectionLanguageMarker::Name(name));
} else if index == self.injection_filename_capture_index {
let name = byte_range_to_str(capture.node.byte_range(), source);
let path = Path::new(name.as_ref()).to_path_buf();
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
} else if index == self.injection_shebang_capture_index {
let node_slice = source.slice(capture.node.byte_range());

// some languages allow space and newlines before the actual string content
// so a shebang could be on either the first or second line
let lines = if node_slice.len_lines() > 1 {
node_slice.slice({
let start = node_slice.line_to_byte(0);
let end = node_slice.line_to_byte(2) - 1;
start..end
})
} else {
node_slice.line(0)
};

static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());

injection_capture = SHEBANG_REGEX
.captures(lines.as_str().unwrap_or_default())
.map(|cap| InjectionLanguageMarker::Shebang(cap[1].to_owned()))
} else if index == self.injection_content_capture_index {
content_node = Some(capture.node);
}
}
(injection_capture, content_node)
}

fn injection_for_match<'a>(
&self,
query: &'a Query,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
) -> (
Option<InjectionLanguageMarker<'a>>,
Option<Node<'a>>,
IncludedChildren,
) {
let (mut injection_capture, content_node) = self.injection_pair(query_match, source);

let mut included_children = IncludedChildren::default();
for prop in query.property_settings(query_match.pattern_index) {
match prop.key.as_ref() {
// In addition to specifying the language name via the text of a
// captured node, it can also be hard-coded via a `#set!` predicate
// that sets the injection.language key.
"injection.language" => {
injection_capture = prop
.value
.as_ref()
.map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
}

// By default, injections do not include the *children* of an
// `injection.content` node - only the ranges that belong to the
// node itself. This can be changed using a `#set!` predicate that
// sets the `injection.include-children` key.
"injection.include-children" => included_children = IncludedChildren::All,

// Some queries might only exclude named children but include unnamed
// children in their `injection.content` node. This can be enabled using
// a `#set!` predicate that sets the `injection.include-unnamed-children` key.
"injection.include-unnamed-children" => {
included_children = IncludedChildren::Unnamed
}
_ => {}
}
}

(injection_capture, content_node, included_children)
}
}

impl<'a> HighlightIterLayer<'a> {
Expand Down Expand Up @@ -1825,96 +1908,13 @@ impl<'a> Iterator for HighlightIter<'a> {
}

#[derive(Debug, Clone)]
pub enum InjectionCapture<'a> {
pub enum InjectionLanguageMarker<'a> {
Name(Cow<'a, str>),
File(Cow<'a, Path>),
Filename(Cow<'a, Path>),
Shebang(String),
}

static SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";

fn injection_for_match<'a>(
config: &HighlightConfiguration,
query: &'a Query,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
) -> (
Option<InjectionCapture<'a>>,
Option<Node<'a>>,
IncludedChildren,
) {
let content_capture_index = config.injection_content_capture_index;
let language_capture_index = config.injection_language_capture_index;
let language_filename_index = config.injection_filename_capture_index;
let language_shebang_index = config.injection_shebang_capture_index;

let mut injection_capture = None;
let mut content_node = None;
for capture in query_match.captures {
let index = Some(capture.index);
if index == language_capture_index {
let name = byte_range_to_str(capture.node.byte_range(), source);
injection_capture = Some(InjectionCapture::Name(name));
} else if index == content_capture_index {
content_node = Some(capture.node);
} else if index == language_filename_index {
let name = byte_range_to_str(capture.node.byte_range(), source);
let path = Path::new(name.as_ref()).to_path_buf();

injection_capture = Some(InjectionCapture::File(path.into()));
} else if index == language_shebang_index {
let node_slice = source.slice(capture.node.byte_range());

// some languages allow space and newlines before the actual string content
// so a shebang could be on either the first or second line
let lines = if node_slice.len_lines() > 1 {
node_slice.slice(
// start of first line
node_slice.line_to_byte(0)
// end of second line
..node_slice.line_to_byte(2) - 1,
)
} else {
node_slice.line(0)
};

static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());

injection_capture = SHEBANG_REGEX
.captures(lines.as_str().unwrap_or_default())
.map(|cap| InjectionCapture::Shebang(cap[1].to_owned()));
}
}

let mut included_children = IncludedChildren::default();
for prop in query.property_settings(query_match.pattern_index) {
match prop.key.as_ref() {
// In addition to specifying the language name via the text of a
// captured node, it can also be hard-coded via a `#set!` predicate
// that sets the injection.language key.
"injection.language" => {
injection_capture = prop
.value
.as_ref()
.map(|s| InjectionCapture::Name(s.as_ref().into()));
}

// By default, injections do not include the *children* of an
// `injection.content` node - only the ranges that belong to the
// node itself. This can be changed using a `#set!` predicate that
// sets the `injection.include-children` key.
"injection.include-children" => included_children = IncludedChildren::All,

// Some queries might only exclude named children but include unnamed
// children in their `injection.content` node. This can be enabled using
// a `#set!` predicate that sets the `injection.include-unnamed-children` key.
"injection.include-unnamed-children" => included_children = IncludedChildren::Unnamed,
_ => {}
}
}

(injection_capture, content_node, included_children)
}
const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";

pub struct Merge<I> {
iter: I,
Expand Down
6 changes: 4 additions & 2 deletions helix-term/src/ui/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::sync::Arc;
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag};

use helix_core::{
syntax::{self, HighlightEvent, InjectionCapture, Syntax},
syntax::{self, HighlightEvent, InjectionLanguageMarker, Syntax},
Rope,
};
use helix_view::{
Expand Down Expand Up @@ -47,7 +47,9 @@ pub fn highlighted_code_block<'a>(

let rope = Rope::from(text.as_ref());
let syntax = config_loader
.language_configuration_for_injection_string(&InjectionCapture::Name(language.into()))
.language_configuration_for_injection_string(&InjectionLanguageMarker::Name(
language.into(),
))
.and_then(|config| config.highlight_config(theme.scopes()))
.map(|config| Syntax::new(&rope, config, Arc::clone(&config_loader)));

Expand Down

0 comments on commit 4947c74

Please sign in to comment.