331 lines
9.0 KiB
Rust
331 lines
9.0 KiB
Rust
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// Lic// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
//
|
|
// Copyright (c) 2024, Olof Kraigher olof.kraigher@gmail.com
|
|
|
|
use crate::syntax::{Comment, Value};
|
|
use crate::{kind_str, Token};
|
|
use std::cmp::max;
|
|
use std::iter;
|
|
|
|
/// The Buffer is the (mostly) mutable object used to write tokens to a string.
|
|
/// It operates mostly on tokens and is capable of indenting,
|
|
/// de-indenting and keeping the indentation level.
|
|
pub struct Buffer {
|
|
inner: String,
|
|
/// insert an extra newline before pushing a token.
|
|
/// This is relevant when there is a trailing comment
|
|
insert_extra_newline: bool,
|
|
/// The current indentation level
|
|
indentation: usize,
|
|
/// The char used for indentation
|
|
indent_char: char,
|
|
/// The width used at each indentation level
|
|
indent_width: usize,
|
|
}
|
|
|
|
impl Buffer {
|
|
pub fn new() -> Buffer {
|
|
Buffer {
|
|
inner: String::new(),
|
|
insert_extra_newline: false,
|
|
indentation: 0,
|
|
indent_char: ' ',
|
|
indent_width: 4,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Default for Buffer {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
/// Returns whether a leading comment is on the same line as the token, i.e.,
|
|
/// check the case
|
|
/// ```vhdl
|
|
/// /* some comment */ token
|
|
/// ```
|
|
fn leading_comment_is_on_token_line(comment: &Comment, token: &Token) -> bool {
|
|
if !comment.multi_line {
|
|
return false;
|
|
}
|
|
if comment.range.start.line != comment.range.end.line {
|
|
return false;
|
|
}
|
|
token.pos.start().line == comment.range.start.line
|
|
}
|
|
|
|
impl From<Buffer> for String {
|
|
fn from(value: Buffer) -> Self {
|
|
value.inner
|
|
}
|
|
}
|
|
|
|
impl Buffer {
|
|
pub fn as_str(&self) -> &str {
|
|
self.inner.as_str()
|
|
}
|
|
|
|
/// pushes a whitespace character to the buffer
|
|
pub fn push_whitespace(&mut self) {
|
|
if !self.insert_extra_newline {
|
|
self.push_ch(' ');
|
|
}
|
|
}
|
|
|
|
fn format_comment(&mut self, comment: &Comment) {
|
|
if !comment.multi_line {
|
|
self.push_str("--");
|
|
self.push_str(comment.value.trim_end())
|
|
} else {
|
|
self.push_str("/*");
|
|
self.push_str(&comment.value);
|
|
self.push_str("*/");
|
|
}
|
|
}
|
|
|
|
fn format_leading_comments(&mut self, comments: &[Comment]) {
|
|
for (i, comment) in comments.iter().enumerate() {
|
|
self.format_comment(comment);
|
|
if let Some(next_comment) = comments.get(i + 1) {
|
|
let number_of_line_breaks =
|
|
max(next_comment.range.start.line - comment.range.end.line, 1);
|
|
self.line_breaks(number_of_line_breaks);
|
|
} else {
|
|
self.line_break();
|
|
}
|
|
}
|
|
}
|
|
|
|
fn indent(&mut self) {
|
|
self.inner
|
|
.extend(iter::repeat(self.indent_char).take(self.indent_width * self.indentation));
|
|
}
|
|
|
|
/// Push a token to this buffer.
|
|
/// This takes care of all the leading and trailing comments attached to that token.
|
|
pub fn push_token(&mut self, token: &Token) {
|
|
if self.insert_extra_newline {
|
|
self.line_break();
|
|
}
|
|
self.insert_extra_newline = false;
|
|
if let Some(comments) = &token.comments {
|
|
// This is for example the case for situations like
|
|
// some_token /* comment in between */ some_other token
|
|
if comments.leading.len() == 1
|
|
&& leading_comment_is_on_token_line(&comments.leading[0], token)
|
|
{
|
|
self.format_comment(&comments.leading[0]);
|
|
self.push_ch(' ');
|
|
} else if !comments.leading.is_empty() {
|
|
self.format_leading_comments(comments.leading.as_slice());
|
|
}
|
|
}
|
|
match &token.value {
|
|
Value::Identifier(ident) => self.push_str(&ident.to_string()),
|
|
Value::String(string) => {
|
|
self.push_ch('"');
|
|
for byte in &string.bytes {
|
|
if *byte == b'"' {
|
|
self.push_ch('"');
|
|
self.push_ch('"');
|
|
} else {
|
|
self.push_ch(*byte as char);
|
|
}
|
|
}
|
|
self.push_ch('"');
|
|
}
|
|
Value::BitString(value, _) => self.push_str(&value.to_string()),
|
|
Value::AbstractLiteral(value, _) => self.push_str(&value.to_string()),
|
|
Value::Character(char) => {
|
|
self.push_ch('\'');
|
|
self.push_ch(*char as char);
|
|
self.push_ch('\'');
|
|
}
|
|
Value::Text(text) => self.push_str(&text.to_string()),
|
|
Value::None => self.push_str(kind_str(token.kind)),
|
|
}
|
|
if let Some(comments) = &token.comments {
|
|
if let Some(trailing_comment) = &comments.trailing {
|
|
self.push_ch(' ');
|
|
self.format_comment(trailing_comment);
|
|
self.insert_extra_newline = true
|
|
}
|
|
}
|
|
}
|
|
|
|
fn push_str(&mut self, value: &str) {
|
|
self.inner.push_str(value);
|
|
}
|
|
|
|
fn push_ch(&mut self, char: char) {
|
|
self.inner.push(char);
|
|
}
|
|
|
|
/// Increase the indentation level.
|
|
/// After this call, all new-line pushes will be preceded by an indentation,
|
|
/// specified via the `indent_char` and `indent_width` properties.
|
|
///
|
|
/// This call should always be matched with a `decrease_indent` call.
|
|
/// There is also the `indented` macro that combines the two calls.
|
|
pub fn increase_indent(&mut self) {
|
|
self.indentation += 1;
|
|
}
|
|
|
|
pub fn decrease_indent(&mut self) {
|
|
self.indentation -= 1;
|
|
}
|
|
|
|
/// Inserts a line break (i.e., newline) at the current position
|
|
pub fn line_break(&mut self) {
|
|
self.insert_extra_newline = false;
|
|
self.push_ch('\n');
|
|
self.indent();
|
|
}
|
|
|
|
/// Inserts multiple line breaks.
|
|
/// Note that this method must always be used (i.e., is different from
|
|
/// multiple `line_break` calls) as this method only indents the last line break
|
|
pub fn line_breaks(&mut self, count: u32) {
|
|
self.insert_extra_newline = false;
|
|
for _ in 0..count {
|
|
self.push_ch('\n');
|
|
}
|
|
self.indent();
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::analysis::tests::Code;
|
|
use crate::formatting::buffer::Buffer;
|
|
use std::iter::zip;
|
|
|
|
fn check_token_formatted(input: &str, expected: &[&str]) {
|
|
let code = Code::new(input);
|
|
let tokens = code.tokenize();
|
|
for (token, expected) in zip(tokens, expected) {
|
|
let mut buffer = Buffer::new();
|
|
buffer.push_token(&token);
|
|
assert_eq!(buffer.as_str(), *expected);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn format_simple_token() {
|
|
check_token_formatted("entity", &["entity"]);
|
|
check_token_formatted("foobar", &["foobar"]);
|
|
check_token_formatted("1 23 4E5 4e5", &["1", "23", "4E5", "4e5"]);
|
|
}
|
|
|
|
#[test]
|
|
fn preserves_identifier_casing() {
|
|
check_token_formatted("FooBar foobar", &["FooBar", "foobar"]);
|
|
}
|
|
|
|
#[test]
|
|
fn character_formatting() {
|
|
check_token_formatted("'a' 'Z' '''", &["'a'", "'Z'", "'''"]);
|
|
}
|
|
|
|
#[test]
|
|
fn string_formatting() {
|
|
check_token_formatted(
|
|
r#""ABC" "" "DEF" """" "Hello "" ""#,
|
|
&["\"ABC\"", "\"\"", "\"DEF\"", "\"\"\"\"", "\"Hello \"\" \""],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bit_string_formatting() {
|
|
check_token_formatted(r#"B"10" 20B"8" X"2F""#, &["B\"10\"", "20B\"8\"", "X\"2F\""]);
|
|
}
|
|
|
|
#[test]
|
|
fn leading_comment() {
|
|
check_token_formatted(
|
|
"\
|
|
-- I am a comment
|
|
foobar
|
|
",
|
|
&["\
|
|
-- I am a comment
|
|
foobar"],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn multiple_leading_comments() {
|
|
check_token_formatted(
|
|
"\
|
|
-- I am a comment
|
|
-- So am I
|
|
foobar
|
|
",
|
|
&["\
|
|
-- I am a comment
|
|
-- So am I
|
|
foobar"],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn trailing_comments() {
|
|
check_token_formatted(
|
|
"\
|
|
foobar --After foobar comes foobaz
|
|
",
|
|
&["foobar --After foobar comes foobaz"],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn single_multiline_comment() {
|
|
check_token_formatted(
|
|
"\
|
|
/** Some documentation.
|
|
* This is a token named 'entity'
|
|
*/
|
|
entity
|
|
",
|
|
&["\
|
|
/** Some documentation.
|
|
* This is a token named 'entity'
|
|
*/
|
|
entity"],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn multiline_comment_and_simple_comment() {
|
|
check_token_formatted(
|
|
"\
|
|
/* I am a multiline comment */
|
|
-- And I am a single line comment
|
|
entity
|
|
",
|
|
&["\
|
|
/* I am a multiline comment */
|
|
-- And I am a single line comment
|
|
entity"],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn leading_comment_and_trailing_comment() {
|
|
check_token_formatted(
|
|
"\
|
|
-- Leading comment
|
|
entity -- Trailing comment
|
|
",
|
|
&["\
|
|
-- Leading comment
|
|
entity -- Trailing comment"],
|
|
);
|
|
}
|
|
}
|