Skip to content

Commit

Permalink
Normalize '\r' in string literals to '\n'
Browse files Browse the repository at this point in the history
<!--
Thank you for contributing to Ruff! To help us out with reviewing, please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

This PR normalizes line endings inside of strings to `\n` as required by the printer.

<!-- What's the purpose of the change? What does it do, and why? -->

## Test Plan

I added a new test using `\r\n` and ran the ecosystem check. There are no remaining end of line panics. 


https://gist.github.com/MichaReiser/8f36b1391ca7b48475b3a4f592d74ff4

<!-- How was it tested? -->
  • Loading branch information
MichaReiser committed Jun 30, 2023
1 parent dc65007 commit f9129e4
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 43 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[*.py]
end_of_line = crlf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=crlf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
'This string will not include \
backslashes or newline characters.'

"""Multiline
String \"
"""
103 changes: 60 additions & 43 deletions crates/ruff_python_formatter/src/expression/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,11 @@ impl Format<PyFormatContext<'_>> for FormatStringPart {
let raw_content_range = relative_raw_content_range + self.part_range.start();

let raw_content = &string_content[relative_raw_content_range];
let (preferred_quotes, contains_newlines) =
preferred_quotes(raw_content, quotes, f.options().quote_style());
let preferred_quotes = preferred_quotes(raw_content, quotes, f.options().quote_style());

write!(f, [prefix, preferred_quotes])?;

let normalized = normalize_quotes(raw_content, preferred_quotes);
let (normalized, contains_newlines) = normalize_string(raw_content, preferred_quotes);

match normalized {
Cow::Borrowed(_) => {
Expand Down Expand Up @@ -294,9 +293,7 @@ fn preferred_quotes(
input: &str,
quotes: StringQuotes,
configured_style: QuoteStyle,
) -> (StringQuotes, ContainsNewlines) {
let mut contains_newlines = ContainsNewlines::No;

) -> StringQuotes {
let preferred_style = if quotes.triple {
// True if the string contains a triple quote sequence of the configured quote style.
let mut uses_triple_quotes = false;
Expand All @@ -305,7 +302,6 @@ fn preferred_quotes(
while let Some(c) = chars.next() {
let configured_quote_char = configured_style.as_char();
match c {
'\n' | '\r' => contains_newlines = ContainsNewlines::Yes,
'\\' => {
if matches!(chars.peek(), Some('"' | '\\')) {
chars.next();
Expand Down Expand Up @@ -358,10 +354,6 @@ fn preferred_quotes(
double_quotes += 1;
}

'\n' | '\r' => {
contains_newlines = ContainsNewlines::Yes;
}

_ => continue,
}
}
Expand All @@ -384,13 +376,10 @@ fn preferred_quotes(
}
};

(
StringQuotes {
triple: quotes.triple,
style: preferred_style,
},
contains_newlines,
)
StringQuotes {
triple: quotes.triple,
style: preferred_style,
}
}

#[derive(Copy, Clone, Debug)]
Expand Down Expand Up @@ -435,30 +424,56 @@ impl Format<PyFormatContext<'_>> for StringQuotes {

/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
/// with the provided `style`.
fn normalize_quotes(input: &str, quotes: StringQuotes) -> Cow<str> {
if quotes.triple {
Cow::Borrowed(input)
} else {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
// Tracks the last index of `input` that has been written to `output`.
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
let mut last_index = 0;

let style = quotes.style;
let preferred_quote = style.as_char();
let opposite_quote = style.invert().as_char();

let mut chars = input.char_indices();
///
/// Returns the normalized string and whether it contains new lines.
fn normalize_string(input: &str, quotes: StringQuotes) -> (Cow<str>, ContainsNewlines) {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
// Tracks the last index of `input` that has been written to `output`.
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
let mut last_index = 0;

let mut newlines = ContainsNewlines::No;

let style = quotes.style;
let preferred_quote = style.as_char();
let opposite_quote = style.invert().as_char();

let mut chars = input.char_indices();

while let Some((index, c)) = chars.next() {
if c == '\r' {
output.push_str(&input[last_index..index]);

// Skip over the '\r' character, keep the `\n`
if input.as_bytes().get(index + 1).copied() == Some(b'\n') {
chars.next();
}
// Replace the `\r` with a `\n`
else {
output.push('\n');
}

while let Some((index, c)) = chars.next() {
last_index = index + '\r'.len_utf8();
newlines = ContainsNewlines::Yes;
} else if c == '\n' {
newlines = ContainsNewlines::Yes;
} else if !quotes.triple {
if c == '\\' {
if let Some((_, next)) = chars.next() {
if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote {
// Remove the escape by ending before the backslash and starting again with the quote
chars.next();
output.push_str(&input[last_index..index]);
last_index = index + '\\'.len_utf8();
} else if next == preferred_quote {
// Quote is already escaped, skip over it.
chars.next();
} else if next == '\\' {
// Skip over escaped backslashes
chars.next();
}
}
} else if c == preferred_quote {
Expand All @@ -469,12 +484,14 @@ fn normalize_quotes(input: &str, quotes: StringQuotes) -> Cow<str> {
last_index = index + preferred_quote.len_utf8();
}
}

if last_index == 0 {
Cow::Borrowed(input)
} else {
output.push_str(&input[last_index..]);
Cow::Owned(output)
}
}

let normalized = if last_index == 0 {
Cow::Borrowed(input)
} else {
output.push_str(&input[last_index..]);
Cow::Owned(output)
};

(normalized, newlines)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
source: crates/ruff_python_formatter/tests/fixtures.rs
input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/carriage_return/string.py
---
## Input
```py
'This string will not include \
backslashes or newline characters.'
"""Multiline
String \"
"""
```

## Output
```py
"This string will not include \
backslashes or newline characters."
"""Multiline
String \"
"""
```



0 comments on commit f9129e4

Please sign in to comment.