534 lines
18 KiB
Rust
534 lines
18 KiB
Rust
use {
|
|
crate::toml::{
|
|
toml_lexer::{Lexer, Token},
|
|
toml_span::{Span, Spanned, SpannedExt},
|
|
toml_value::Value,
|
|
},
|
|
bstr::ByteSlice,
|
|
indexmap::{
|
|
IndexMap,
|
|
map::{RawEntryApiV1, raw_entry_v1::RawEntryMut},
|
|
},
|
|
std::{collections::VecDeque, mem, str::FromStr},
|
|
thiserror::Error,
|
|
};
|
|
|
|
pub trait ErrorHandler {
|
|
fn handle(&self, err: Spanned<ParserError>);
|
|
fn redefinition(&self, err: Spanned<ParserError>, prev: Span);
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum ParserError {
|
|
#[error("Unexpected end of file")]
|
|
UnexpectedEof,
|
|
#[error("Expected a key")]
|
|
MissingKey,
|
|
#[error("Expected {0} but found {1}")]
|
|
Expected(&'static str, &'static str),
|
|
#[error("Duplicate key overwrites the previous definition")]
|
|
Redefined,
|
|
#[error("Literal is not valid UTF-8")]
|
|
NonUtf8Literal,
|
|
#[error("Could not parse the literal")]
|
|
UnknownLiteral,
|
|
#[error("Ignoring key due to following error")]
|
|
IgnoringKey,
|
|
#[error("Unnecessary comma")]
|
|
UnnecessaryComma,
|
|
}
|
|
|
|
pub fn parse(
|
|
input: &[u8],
|
|
error_handler: &dyn ErrorHandler,
|
|
) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let parser = Parser {
|
|
lexer: Lexer::new(input),
|
|
error_handler,
|
|
last_span: None,
|
|
};
|
|
parser.parse()
|
|
}
|
|
|
|
struct Parser<'a, 'b> {
|
|
lexer: Lexer<'a>,
|
|
error_handler: &'b dyn ErrorHandler,
|
|
last_span: Option<Span>,
|
|
}
|
|
|
|
type Key = VecDeque<Spanned<String>>;
|
|
|
|
impl<'a> Parser<'a, '_> {
|
|
fn parse(mut self) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
self.parse_document()
|
|
}
|
|
|
|
fn unexpected_eof(&self) -> Spanned<ParserError> {
|
|
let span = self.last_span.unwrap_or(Span { lo: 0, hi: 0 });
|
|
ParserError::UnexpectedEof.spanned(span)
|
|
}
|
|
|
|
fn next(&mut self, value_context: bool) -> Result<Spanned<Token<'a>>, Spanned<ParserError>> {
|
|
match self.lexer.next(value_context) {
|
|
Some(t) => {
|
|
self.last_span = Some(t.span);
|
|
Ok(t)
|
|
}
|
|
_ => Err(self.unexpected_eof()),
|
|
}
|
|
}
|
|
|
|
fn peek(&mut self, value_context: bool) -> Result<Spanned<Token<'a>>, Spanned<ParserError>> {
|
|
match self.lexer.peek(value_context) {
|
|
Some(t) => Ok(t),
|
|
_ => Err(self.unexpected_eof()),
|
|
}
|
|
}
|
|
|
|
fn parse_value(&mut self) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let token = self.peek(true)?;
|
|
match token.value {
|
|
Token::LiteralString(s) => self.parse_literal_string(s),
|
|
Token::CookedString(s) => self.parse_cooked_string(s),
|
|
Token::LeftBracket => self.parse_array(),
|
|
Token::Literal(l) => self.parse_literal_value(l),
|
|
Token::LeftBrace => self.parse_inline_table(),
|
|
Token::Dot | Token::Equals | Token::Comma | Token::RightBrace | Token::RightBracket => {
|
|
Err(ParserError::Expected("a value", token.value.name(true)).spanned(token.span))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_literal_value(
|
|
&mut self,
|
|
literal: &[u8],
|
|
) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let span = self.next(true)?.span;
|
|
let Ok(s) = std::str::from_utf8(literal) else {
|
|
return Err(ParserError::NonUtf8Literal.spanned(span));
|
|
};
|
|
if s == "true" {
|
|
return Ok(Value::Boolean(true).spanned(span));
|
|
}
|
|
if s == "false" {
|
|
return Ok(Value::Boolean(false).spanned(span));
|
|
}
|
|
let s = s.replace('_', "");
|
|
if let Ok(n) = i64::from_str(&s) {
|
|
return Ok(Value::Integer(n).spanned(span));
|
|
}
|
|
'radix: {
|
|
let b = s.as_bytes();
|
|
if b.len() >= 2 && b[0] == b'0' {
|
|
let radix = match b[1] {
|
|
b'x' => 16,
|
|
b'o' => 8,
|
|
b'b' => 2,
|
|
_ => break 'radix,
|
|
};
|
|
if let Ok(n) = i64::from_str_radix(&s[2..], radix) {
|
|
return Ok(Value::Integer(n).spanned(span));
|
|
}
|
|
}
|
|
}
|
|
if let Ok(n) = f64::from_str(&s) {
|
|
return Ok(Value::Float(n).spanned(span));
|
|
}
|
|
Err(ParserError::UnknownLiteral.spanned(span))
|
|
}
|
|
|
|
fn parse_literal_string(&mut self, s: &[u8]) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let span = self.next(true)?.span;
|
|
let s = s.as_bstr().to_string();
|
|
Ok(Value::String(s).spanned(span))
|
|
}
|
|
|
|
fn parse_cooked_string(&mut self, s: &[u8]) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let span = self.next(true)?.span;
|
|
let s = self.cook_string(s);
|
|
Ok(Value::String(s).spanned(span))
|
|
}
|
|
|
|
fn cook_string(&self, s: &[u8]) -> String {
|
|
use std::io::Write;
|
|
|
|
if !s.contains(&b'\\') {
|
|
return s.as_bstr().to_string();
|
|
}
|
|
let mut res = vec![];
|
|
let mut pos = 0;
|
|
while pos < s.len() {
|
|
let c = s[pos];
|
|
pos += 1;
|
|
match c {
|
|
b'\\' => {
|
|
let c = s[pos];
|
|
pos += 1;
|
|
match c {
|
|
b'\\' => res.push(b'\\'),
|
|
b'"' => res.push(b'"'),
|
|
b'b' => res.push(0x8),
|
|
b'f' => res.push(0xc),
|
|
b'n' => res.push(b'\n'),
|
|
b'r' => res.push(b'\r'),
|
|
b't' => res.push(b'\t'),
|
|
b'e' => res.push(0x1b),
|
|
b'x' | b'u' | b'U' => 'unicode: {
|
|
let len = match c {
|
|
b'x' => 2,
|
|
b'u' => 4,
|
|
_ => 8,
|
|
};
|
|
if s.len() - pos >= len {
|
|
if let Ok(s) = std::str::from_utf8(&s[pos..pos + len]) {
|
|
if let Ok(n) = u32::from_str_radix(s, 16) {
|
|
if let Some(c) = char::from_u32(n) {
|
|
pos += len;
|
|
let _ = write!(res, "{c}");
|
|
break 'unicode;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
res.extend_from_slice(&s[pos - 2..]);
|
|
}
|
|
b' ' | b'\t' | b'\n' => {
|
|
let mut t = pos;
|
|
let mut saw_nl = c == b'\n';
|
|
while t < s.len() && matches!(s[t], b' ' | b'\t' | b'\n') {
|
|
saw_nl |= s[t] == b'\n';
|
|
t += 1;
|
|
}
|
|
if saw_nl {
|
|
pos = t;
|
|
} else {
|
|
res.extend_from_slice(&[b'\\', c]);
|
|
}
|
|
}
|
|
_ => {
|
|
res.extend_from_slice(&[b'\\', c]);
|
|
}
|
|
}
|
|
}
|
|
_ => res.push(c),
|
|
}
|
|
}
|
|
res.as_bstr().to_string()
|
|
}
|
|
|
|
fn parse_array(&mut self) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let lo = self.next(true)?.span.lo;
|
|
let mut entries = vec![];
|
|
let mut consumed_comma = false;
|
|
loop {
|
|
if let Some(v) = self.lexer.peek(true) {
|
|
if v.value == Token::RightBracket {
|
|
let _ = self.next(true);
|
|
let hi = v.span.hi;
|
|
let span = Span { lo, hi };
|
|
return Ok(Value::Array(entries).spanned(span));
|
|
}
|
|
if entries.len() > 0 && !mem::take(&mut consumed_comma) {
|
|
self.error_handler.handle(
|
|
ParserError::Expected("`,` or `]`", v.value.name(true)).spanned(v.span),
|
|
);
|
|
}
|
|
}
|
|
match self.parse_value() {
|
|
Ok(v) => {
|
|
entries.push(v);
|
|
consumed_comma = self.skip_comma(true);
|
|
}
|
|
Err(e) => {
|
|
self.skip_tree(Token::LeftBracket, Token::RightBracket);
|
|
return Err(e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_inline_table(&mut self) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let lo = self.next(true)?.span.lo;
|
|
let mut map = IndexMap::new();
|
|
let mut consumed_comma = false;
|
|
loop {
|
|
let token = match self.peek(false) {
|
|
Ok(t) => t,
|
|
Err(e) => {
|
|
self.error_handler.handle(e);
|
|
break;
|
|
}
|
|
};
|
|
if token.value == Token::RightBrace {
|
|
let _ = self.next(false);
|
|
break;
|
|
}
|
|
if !map.is_empty() && !mem::take(&mut consumed_comma) {
|
|
self.error_handler.handle(
|
|
ParserError::Expected("`,` or `}`", token.value.name(false))
|
|
.spanned(token.span),
|
|
);
|
|
}
|
|
let res = match self.parse_key_value_with_recovery() {
|
|
Ok(res) => res,
|
|
Err(e) => {
|
|
self.skip_tree(Token::LeftBrace, Token::RightBrace);
|
|
return Err(e);
|
|
}
|
|
};
|
|
if let Some((mut key, value)) = res {
|
|
self.insert(&mut map, &mut key, value, false, false);
|
|
};
|
|
consumed_comma = self.skip_comma(false);
|
|
}
|
|
let hi = self.last_span().hi;
|
|
let span = Span { lo, hi };
|
|
Ok(Value::Table(map).spanned(span))
|
|
}
|
|
|
|
fn skip_comma(&mut self, value_context: bool) -> bool {
|
|
if let Some(token) = self.lexer.peek(value_context) {
|
|
if token.value != Token::Comma {
|
|
return false;
|
|
}
|
|
let _ = self.next(value_context);
|
|
}
|
|
while let Some(token) = self.lexer.peek(value_context) {
|
|
if token.value != Token::Comma {
|
|
break;
|
|
}
|
|
let _ = self.next(value_context);
|
|
self.error_handler
|
|
.handle(ParserError::UnnecessaryComma.spanned(token.span));
|
|
}
|
|
true
|
|
}
|
|
|
|
fn parse_document(&mut self) -> Result<Spanned<Value>, Spanned<ParserError>> {
|
|
let mut map = IndexMap::new();
|
|
self.parse_table_body(&mut map)?;
|
|
while self.lexer.peek(false).is_some() {
|
|
let (mut key, append) = self.parse_table_header()?;
|
|
let mut inner_map = IndexMap::new();
|
|
self.parse_table_body(&mut inner_map)?;
|
|
let value = Value::Table(inner_map).spanned(key.span);
|
|
self.insert(&mut map, &mut key.value, value, true, append);
|
|
}
|
|
let hi = self.last_span().hi;
|
|
let span = Span { lo: 0, hi };
|
|
Ok(Value::Table(map).spanned(span))
|
|
}
|
|
|
|
fn parse_table_header(&mut self) -> Result<(Spanned<Key>, bool), Spanned<ParserError>> {
|
|
let lo = self.next(false)?.span.lo;
|
|
let mut append = false;
|
|
if let Some(token) = self.lexer.peek(false) {
|
|
if token.value == Token::LeftBracket {
|
|
let _ = self.next(false);
|
|
append = true;
|
|
}
|
|
}
|
|
let key = self.parse_key()?;
|
|
let mut hi = self.parse_exact(Token::RightBracket, false)?.hi;
|
|
if append {
|
|
hi = self.parse_exact(Token::RightBracket, false)?.hi;
|
|
}
|
|
let span = Span { lo, hi };
|
|
Ok((key.spanned(span), append))
|
|
}
|
|
|
|
fn parse_table_body(
|
|
&mut self,
|
|
dst: &mut IndexMap<Spanned<String>, Spanned<Value>>,
|
|
) -> Result<(), Spanned<ParserError>> {
|
|
while let Some(e) = self.lexer.peek(false) {
|
|
if e.value == Token::LeftBracket {
|
|
return Ok(());
|
|
}
|
|
let Some((mut key, value)) = self.parse_key_value_with_recovery()? else {
|
|
continue;
|
|
};
|
|
self.insert(dst, &mut key, value, false, false);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn insert(
|
|
&self,
|
|
dst: &mut IndexMap<Spanned<String>, Spanned<Value>>,
|
|
keys: &mut Key,
|
|
value: Spanned<Value>,
|
|
modify_array_element: bool,
|
|
append_last: bool,
|
|
) {
|
|
let key = keys.pop_front().unwrap();
|
|
if keys.is_empty() {
|
|
if let RawEntryMut::Occupied(mut old) =
|
|
dst.raw_entry_mut_v1().from_key(key.value.as_str())
|
|
{
|
|
if append_last {
|
|
if let Value::Array(array) = &mut old.get_mut().value {
|
|
array.push(value);
|
|
return;
|
|
}
|
|
}
|
|
if let Value::Table(old) = &mut old.get_mut().value {
|
|
if let Value::Table(new) = value.value {
|
|
for (k, v) in new {
|
|
let mut keys = Key::new();
|
|
keys.push_back(k);
|
|
self.insert(old, &mut keys, v, false, false);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
self.error_handler
|
|
.redefinition(ParserError::Redefined.spanned(key.span), old.key().span);
|
|
old.shift_remove();
|
|
}
|
|
let span = value.span;
|
|
let value = match append_last {
|
|
true => Value::Array(vec![value]).spanned(span),
|
|
false => value,
|
|
};
|
|
dst.insert(key, value);
|
|
} else {
|
|
if let RawEntryMut::Occupied(mut o) = dst.raw_entry_mut_v1().from_key(&key) {
|
|
match &mut o.get_mut().value {
|
|
Value::Table(dst) => {
|
|
self.insert(dst, keys, value, modify_array_element, append_last);
|
|
return;
|
|
}
|
|
Value::Array(array) if modify_array_element => {
|
|
if let Some(Value::Table(dst)) =
|
|
array.last_mut().as_mut().map(|v| &mut v.value)
|
|
{
|
|
self.insert(dst, keys, value, modify_array_element, append_last);
|
|
return;
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
self.error_handler
|
|
.redefinition(ParserError::Redefined.spanned(key.span), o.key().span);
|
|
o.shift_remove();
|
|
}
|
|
let mut map = IndexMap::new();
|
|
let span = value.span;
|
|
self.insert(&mut map, keys, value, modify_array_element, append_last);
|
|
dst.insert(key, Value::Table(map).spanned(span));
|
|
}
|
|
}
|
|
|
|
fn parse_key_value_with_recovery(
|
|
&mut self,
|
|
) -> Result<Option<(Key, Spanned<Value>)>, Spanned<ParserError>> {
|
|
let pos = self.lexer.pos();
|
|
match self.parse_key_value() {
|
|
Ok(kv) => Ok(Some(kv)),
|
|
Err((e, key)) => {
|
|
if let Some(key) = key {
|
|
let span = key.back().unwrap().span;
|
|
self.error_handler
|
|
.handle(ParserError::IgnoringKey.spanned(span));
|
|
}
|
|
if self.lexer.pos() == pos {
|
|
Err(e)
|
|
} else {
|
|
self.error_handler.handle(e);
|
|
Ok(None)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[expect(clippy::type_complexity)]
|
|
fn parse_key_value(
|
|
&mut self,
|
|
) -> Result<(Key, Spanned<Value>), (Spanned<ParserError>, Option<Key>)> {
|
|
let key = self.parse_key();
|
|
let eq = self.parse_exact(Token::Equals, true);
|
|
let value = self.parse_value();
|
|
let key = match key {
|
|
Ok(k) => k,
|
|
Err(e) => return Err((e, None)),
|
|
};
|
|
if let Err(e) = eq {
|
|
return Err((e, Some(key)));
|
|
}
|
|
let value = match value {
|
|
Ok(v) => v,
|
|
Err(e) => return Err((e, Some(key))),
|
|
};
|
|
Ok((key, value))
|
|
}
|
|
|
|
fn parse_key(&mut self) -> Result<Key, Spanned<ParserError>> {
|
|
let mut parts = Key::new();
|
|
loop {
|
|
if parts.len() > 0 {
|
|
if self.parse_exact(Token::Dot, false).is_err() {
|
|
break;
|
|
}
|
|
}
|
|
let Some(token) = self.lexer.peek(false) else {
|
|
break;
|
|
};
|
|
let s = match token.value {
|
|
Token::LiteralString(s) => s.as_bstr().to_string(),
|
|
Token::CookedString(s) => self.cook_string(s),
|
|
Token::Literal(l) => l.as_bstr().to_string(),
|
|
_ => break,
|
|
};
|
|
parts.push_back(s.spanned(token.span));
|
|
let _ = self.next(false);
|
|
}
|
|
if parts.is_empty() {
|
|
Err(ParserError::MissingKey.spanned(self.next_span()))
|
|
} else {
|
|
Ok(parts)
|
|
}
|
|
}
|
|
|
|
fn parse_exact(
|
|
&mut self,
|
|
token: Token<'a>,
|
|
value_context: bool,
|
|
) -> Result<Span, Spanned<ParserError>> {
|
|
let actual = match self.peek(value_context) {
|
|
Ok(t) if t.value == token => {
|
|
let _ = self.next(value_context);
|
|
return Ok(t.span);
|
|
}
|
|
Ok(t) => t.value.name(value_context),
|
|
Err(_) => "end of file",
|
|
};
|
|
let span = self.next_span();
|
|
Err(ParserError::Expected(token.name(value_context), actual).spanned(span))
|
|
}
|
|
|
|
fn last_span(&self) -> Span {
|
|
self.last_span.unwrap_or(Span { lo: 0, hi: 0 })
|
|
}
|
|
|
|
fn next_span(&mut self) -> Span {
|
|
self.lexer.peek(false).map(|v| v.span).unwrap_or_else(|| {
|
|
let hi = self.last_span().hi;
|
|
Span { lo: hi, hi }
|
|
})
|
|
}
|
|
|
|
fn skip_tree(&mut self, start: Token, end: Token) {
|
|
let mut depth = 1;
|
|
while let Ok(next) = self.next(false) {
|
|
if next.value == start {
|
|
depth += 1;
|
|
} else if next.value == end {
|
|
depth -= 1;
|
|
if depth == 0 {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|