Skip to content

Commit

Permalink
revamped snippet text element parsing
Browse files Browse the repository at this point in the history
Snippet text elements can contain escape sequences
that must be treated properly. Furthermore snippets
must always escape certain characters (like `}`
or `\`). The function has been updated to account
for that. `text` is now also included with
`anything` to match the grammar and can also
match empty text. To avoid infinite loops the
`non-empty` combinator has been added which is
automatically used in the `one_or_more` and
`zero_or more` combinator where the problemn would
occur.
  • Loading branch information
pascalkuthe authored and archseer committed Mar 16, 2023
1 parent bbf4800 commit 90348b8
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 45 deletions.
117 changes: 72 additions & 45 deletions helix-lsp/src/snippet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ pub enum CaseChange {

#[derive(Debug, PartialEq, Eq)]
pub enum FormatItem<'a> {
Text(&'a str),
Text(Tendril),
Capture(usize),
CaseChange(usize, CaseChange),
Conditional(usize, Option<&'a str>, Option<&'a str>),
}

#[derive(Debug, PartialEq, Eq)]
pub struct Regex<'a> {
value: &'a str,
value: Tendril,
replacement: Vec<FormatItem<'a>>,
options: Option<&'a str>,
options: Tendril,
}

#[derive(Debug, PartialEq, Eq)]
Expand All @@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
},
Choice {
tabstop: usize,
choices: Vec<&'a str>,
choices: Vec<Tendril>,
},
Variable {
name: &'a str,
default: Option<&'a str>,
regex: Option<Regex<'a>>,
},
Text(&'a str),
Text(Tendril),
}

#[derive(Debug, PartialEq, Eq)]
Expand All @@ -67,12 +67,12 @@ fn render_elements(

for element in snippet_elements {
match element {
&Text(text) => {
Text(text) => {
// small optimization to avoid calling replace when it's unnecessary
let text = if text.contains('\n') {
Cow::Owned(text.replace('\n', newline_with_offset))
} else {
Cow::Borrowed(text)
Cow::Borrowed(text.as_str())
};
*offset += text.chars().count();
insert.push_str(&text);
Expand Down Expand Up @@ -160,6 +160,7 @@ pub fn render(
}

mod parser {
use helix_core::Tendril;
use helix_parsec::*;

use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
Expand Down Expand Up @@ -210,8 +211,32 @@ mod parser {
}
}

fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> {
take_while(move |c| cs.into_iter().all(|c1| c != c1))
const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];

fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
move |input: &'a str| {
let mut chars = input.char_indices();
let mut res = Tendril::new();
while let Some((i, c)) = chars.next() {
match c {
'\\' => {
if let Some((_, c)) = chars.next() {
if escape_chars.contains(&c) {
res.push(c);
continue;
}
}
return Ok((&input[i..], res));
}
c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
c => res.push(c),
}
}

Ok(("", res))
}
}

fn digit<'a>() -> impl Parser<'a, Output = usize> {
Expand Down Expand Up @@ -274,20 +299,18 @@ mod parser {
}

fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
let text = map(text(['$', '/']), FormatItem::Text);
let replacement = reparse_as(
take_until(|c| c == '/'),
one_or_more(choice!(format(), text)),
);

map(
seq!(
"/",
take_until(|c| c == '/'),
// TODO parse as ECMAScript and convert to rust regex
non_empty(text(&['/', '\\'])),
"/",
replacement,
one_or_more(choice!(
format(),
map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
)),
"/",
optional(take_until(|c| c == '}')),
text(&['}', '\\',]),
),
|(_, value, _, replacement, _, options)| Regex {
value,
Expand All @@ -308,13 +331,12 @@ mod parser {
}

fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
let text = map(text(['$', '}']), SnippetElement::Text);
map(
seq!(
"${",
digit(),
":",
one_or_more(choice!(anything(), text)),
one_or_more(anything(TEXT_ESCAPE_CHARS)),
"}"
),
|seq| SnippetElement::Placeholder {
Expand All @@ -330,7 +352,7 @@ mod parser {
"${",
digit(),
"|",
sep(take_until(|c| c == ',' || c == '|'), ","),
sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
"|}",
),
|seq| SnippetElement::Choice {
Expand Down Expand Up @@ -368,17 +390,21 @@ mod parser {
)
}

fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
// The parser has to be constructed lazily to avoid infinite opaque type recursion
|input: &'a str| {
let parser = choice!(tabstop(), placeholder(), choice(), variable());
fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
move |input: &'a str| {
let parser = choice!(
tabstop(),
placeholder(),
choice(),
variable(),
map(text(escape_chars), SnippetElement::Text)
);
parser.parse(input)
}
}

fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
let text = map(text(['$']), SnippetElement::Text);
map(one_or_more(choice!(anything(), text)), |parts| Snippet {
map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
elements: parts,
})
}
Expand All @@ -392,6 +418,7 @@ mod parser {
}
})
}

#[cfg(test)]
mod test {
use super::SnippetElement::*;
Expand All @@ -407,12 +434,12 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("match("),
Text("match(".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("Arg1")),
value: vec!(Text("Arg1".into())),
},
Text(")")
Text(")".into())
]
}),
parse("match(${1:Arg1})")
Expand Down Expand Up @@ -446,15 +473,15 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("local "),
Text("local ".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("var")),
value: vec!(Text("var".into())),
},
Text(" = "),
Text(" = ".into()),
Placeholder {
tabstop: 1,
value: vec!(Text("value")),
value: vec!(Text("value".into())),
},
]
}),
Expand All @@ -468,7 +495,7 @@ mod parser {
Ok(Snippet {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(Text("var, "), Tabstop { tabstop: 2 },),
value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
},]
}),
parse("${1:var, $2}")
Expand All @@ -482,10 +509,10 @@ mod parser {
elements: vec![Placeholder {
tabstop: 1,
value: vec!(
Text("foo "),
Text("foo ".into()),
Placeholder {
tabstop: 2,
value: vec!(Text("bar")),
value: vec!(Text("bar".into())),
},
),
},]
Expand All @@ -499,27 +526,27 @@ mod parser {
assert_eq!(
Ok(Snippet {
elements: vec![
Text("hello "),
Text("hello ".into()),
Tabstop { tabstop: 1 },
Tabstop { tabstop: 2 },
Text(" "),
Text(" ".into()),
Choice {
tabstop: 1,
choices: vec!["one", "two", "three"]
choices: vec!["one".into(), "two".into(), "three".into()]
},
Text(" "),
Text(" ".into()),
Variable {
name: "name",
default: Some("foo"),
regex: None
},
Text(" "),
Text(" ".into()),
Variable {
name: "var",
default: None,
regex: None
},
Text(" "),
Text(" ".into()),
Variable {
name: "TM",
default: None,
Expand All @@ -539,9 +566,9 @@ mod parser {
name: "TM_FILENAME",
default: None,
regex: Some(Regex {
value: "(.*).+$",
value: "(.*).+$".into(),
replacement: vec![FormatItem::Capture(1)],
options: None,
options: Tendril::new(),
}),
}]
}),
Expand Down
13 changes: 13 additions & 0 deletions helix-parsec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where
P: Parser<'a, Output = T>,
{
let parser = non_empty(parser);
move |mut input| {
let mut values = Vec::new();

Expand Down Expand Up @@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
where
P: Parser<'a, Output = T>,
{
let parser = non_empty(parser);
move |mut input| {
let mut values = Vec::new();

Expand Down Expand Up @@ -559,3 +561,14 @@ where
Ok((input, values))
}
}

pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
move |input| {
let (new_input, res) = p.parse(input)?;
if new_input.len() == input.len() {
Err(input)
} else {
Ok((new_input, res))
}
}
}

0 comments on commit 90348b8

Please sign in to comment.