2024-10-04 23:29:34 +08:00

331 lines
9.0 KiB
Rust

// This Source Code Form is subject to the terms of the Mozilla Public
// Lic// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// This Source Code Form is subject to the terms of the Mozilla Public
// You can obtain one at http://mozilla.org/MPL/2.0/.
//
// Copyright (c) 2024, Olof Kraigher olof.kraigher@gmail.com
use crate::syntax::{Comment, Value};
use crate::{kind_str, Token};
use std::cmp::max;
use std::iter;
/// The Buffer is the (mostly) mutable object used to write tokens to a string.
/// It operates mostly on tokens and is capable of indenting,
/// de-indenting and keeping the indentation level.
pub struct Buffer {
inner: String,
/// insert an extra newline before pushing a token.
/// This is relevant when there is a trailing comment
insert_extra_newline: bool,
/// The current indentation level
indentation: usize,
/// The char used for indentation
indent_char: char,
/// The width used at each indentation level
indent_width: usize,
}
impl Buffer {
pub fn new() -> Buffer {
Buffer {
inner: String::new(),
insert_extra_newline: false,
indentation: 0,
indent_char: ' ',
indent_width: 4,
}
}
}
impl Default for Buffer {
fn default() -> Self {
Self::new()
}
}
/// Returns whether a leading comment is on the same line as the token, i.e.,
/// check the case
/// ```vhdl
/// /* some comment */ token
/// ```
fn leading_comment_is_on_token_line(comment: &Comment, token: &Token) -> bool {
if !comment.multi_line {
return false;
}
if comment.range.start.line != comment.range.end.line {
return false;
}
token.pos.start().line == comment.range.start.line
}
impl From<Buffer> for String {
fn from(value: Buffer) -> Self {
value.inner
}
}
impl Buffer {
pub fn as_str(&self) -> &str {
self.inner.as_str()
}
/// pushes a whitespace character to the buffer
pub fn push_whitespace(&mut self) {
if !self.insert_extra_newline {
self.push_ch(' ');
}
}
fn format_comment(&mut self, comment: &Comment) {
if !comment.multi_line {
self.push_str("--");
self.push_str(comment.value.trim_end())
} else {
self.push_str("/*");
self.push_str(&comment.value);
self.push_str("*/");
}
}
fn format_leading_comments(&mut self, comments: &[Comment]) {
for (i, comment) in comments.iter().enumerate() {
self.format_comment(comment);
if let Some(next_comment) = comments.get(i + 1) {
let number_of_line_breaks =
max(next_comment.range.start.line - comment.range.end.line, 1);
self.line_breaks(number_of_line_breaks);
} else {
self.line_break();
}
}
}
fn indent(&mut self) {
self.inner
.extend(iter::repeat(self.indent_char).take(self.indent_width * self.indentation));
}
/// Push a token to this buffer.
/// This takes care of all the leading and trailing comments attached to that token.
pub fn push_token(&mut self, token: &Token) {
if self.insert_extra_newline {
self.line_break();
}
self.insert_extra_newline = false;
if let Some(comments) = &token.comments {
// This is for example the case for situations like
// some_token /* comment in between */ some_other token
if comments.leading.len() == 1
&& leading_comment_is_on_token_line(&comments.leading[0], token)
{
self.format_comment(&comments.leading[0]);
self.push_ch(' ');
} else if !comments.leading.is_empty() {
self.format_leading_comments(comments.leading.as_slice());
}
}
match &token.value {
Value::Identifier(ident) => self.push_str(&ident.to_string()),
Value::String(string) => {
self.push_ch('"');
for byte in &string.bytes {
if *byte == b'"' {
self.push_ch('"');
self.push_ch('"');
} else {
self.push_ch(*byte as char);
}
}
self.push_ch('"');
}
Value::BitString(value, _) => self.push_str(&value.to_string()),
Value::AbstractLiteral(value, _) => self.push_str(&value.to_string()),
Value::Character(char) => {
self.push_ch('\'');
self.push_ch(*char as char);
self.push_ch('\'');
}
Value::Text(text) => self.push_str(&text.to_string()),
Value::None => self.push_str(kind_str(token.kind)),
}
if let Some(comments) = &token.comments {
if let Some(trailing_comment) = &comments.trailing {
self.push_ch(' ');
self.format_comment(trailing_comment);
self.insert_extra_newline = true
}
}
}
fn push_str(&mut self, value: &str) {
self.inner.push_str(value);
}
fn push_ch(&mut self, char: char) {
self.inner.push(char);
}
/// Increase the indentation level.
/// After this call, all new-line pushes will be preceded by an indentation,
/// specified via the `indent_char` and `indent_width` properties.
///
/// This call should always be matched with a `decrease_indent` call.
/// There is also the `indented` macro that combines the two calls.
pub fn increase_indent(&mut self) {
self.indentation += 1;
}
pub fn decrease_indent(&mut self) {
self.indentation -= 1;
}
/// Inserts a line break (i.e., newline) at the current position
pub fn line_break(&mut self) {
self.insert_extra_newline = false;
self.push_ch('\n');
self.indent();
}
/// Inserts multiple line breaks.
/// Note that this method must always be used (i.e., is different from
/// multiple `line_break` calls) as this method only indents the last line break
pub fn line_breaks(&mut self, count: u32) {
self.insert_extra_newline = false;
for _ in 0..count {
self.push_ch('\n');
}
self.indent();
}
}
#[cfg(test)]
mod tests {
use crate::analysis::tests::Code;
use crate::formatting::buffer::Buffer;
use std::iter::zip;
fn check_token_formatted(input: &str, expected: &[&str]) {
let code = Code::new(input);
let tokens = code.tokenize();
for (token, expected) in zip(tokens, expected) {
let mut buffer = Buffer::new();
buffer.push_token(&token);
assert_eq!(buffer.as_str(), *expected);
}
}
#[test]
fn format_simple_token() {
check_token_formatted("entity", &["entity"]);
check_token_formatted("foobar", &["foobar"]);
check_token_formatted("1 23 4E5 4e5", &["1", "23", "4E5", "4e5"]);
}
#[test]
fn preserves_identifier_casing() {
check_token_formatted("FooBar foobar", &["FooBar", "foobar"]);
}
#[test]
fn character_formatting() {
check_token_formatted("'a' 'Z' '''", &["'a'", "'Z'", "'''"]);
}
#[test]
fn string_formatting() {
check_token_formatted(
r#""ABC" "" "DEF" """" "Hello "" ""#,
&["\"ABC\"", "\"\"", "\"DEF\"", "\"\"\"\"", "\"Hello \"\" \""],
);
}
#[test]
fn bit_string_formatting() {
check_token_formatted(r#"B"10" 20B"8" X"2F""#, &["B\"10\"", "20B\"8\"", "X\"2F\""]);
}
#[test]
fn leading_comment() {
check_token_formatted(
"\
-- I am a comment
foobar
",
&["\
-- I am a comment
foobar"],
);
}
#[test]
fn multiple_leading_comments() {
check_token_formatted(
"\
-- I am a comment
-- So am I
foobar
",
&["\
-- I am a comment
-- So am I
foobar"],
);
}
#[test]
fn trailing_comments() {
check_token_formatted(
"\
foobar --After foobar comes foobaz
",
&["foobar --After foobar comes foobaz"],
);
}
#[test]
fn single_multiline_comment() {
check_token_formatted(
"\
/** Some documentation.
* This is a token named 'entity'
*/
entity
",
&["\
/** Some documentation.
* This is a token named 'entity'
*/
entity"],
);
}
#[test]
fn multiline_comment_and_simple_comment() {
check_token_formatted(
"\
/* I am a multiline comment */
-- And I am a single line comment
entity
",
&["\
/* I am a multiline comment */
-- And I am a single line comment
entity"],
);
}
#[test]
fn leading_comment_and_trailing_comment() {
check_token_formatted(
"\
-- Leading comment
entity -- Trailing comment
",
&["\
-- Leading comment
entity -- Trailing comment"],
);
}
}