diff --git a/Cargo.lock b/Cargo.lock index 43cd50e9..dcbafa8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1012,6 +1012,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8927b0664f5c5a98265138b7e3f90aa19a6b21353182469ace36d4ac527b7b1b" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.38" @@ -1713,6 +1722,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wire-to-xml" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "quick-xml", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" diff --git a/Cargo.toml b/Cargo.toml index 30e475ab..ca184928 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ name = "jay" path = "src/main.rs" [workspace] -members = ["jay-config", "toml-config", "algorithms", "toml-spec"] +members = ["jay-config", "toml-config", "algorithms", "toml-spec", "wire-to-xml"] [profile.release] panic = "abort" diff --git a/build/wire.rs b/build/wire.rs index e6f81b1f..327b1fb3 100644 --- a/build/wire.rs +++ b/build/wire.rs @@ -1,570 +1,17 @@ +mod parser; + use { - crate::open, - anyhow::{Context, Result, bail}, + crate::{ + open, + wire::parser::{Field, Lined, Message, Type, parse_messages, to_camel}, + }, + anyhow::{Context, Result}, std::{fs::DirEntry, io::Write, os::unix::ffi::OsStrExt}, }; -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum TreeDelim { - Paren, - Brace, -} - -impl TreeDelim { - fn opening(self) -> u8 { - match self { - TreeDelim::Paren => b'(', - TreeDelim::Brace => b'{', - } - } - - fn closing(self) -> u8 { - match self { - TreeDelim::Paren => b')', - TreeDelim::Brace => b'}', - } - } -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum Symbol { - Comma, - Colon, - Equals, -} - -impl Symbol { - fn name(self) -> &'static str { - match self { - Symbol::Comma => "','", - Symbol::Colon => "':'", - Symbol::Equals => "'='", - } - } -} - -#[derive(Debug)] -struct Token<'a> { - line: u32, - kind: TokenKind<'a>, -} - -#[derive(Debug)] -enum TokenKind<'a> { - Ident(&'a str), - Num(u32), - Tree { - delim: TreeDelim, - body: Vec>, - }, - Symbol(Symbol), -} - -impl TokenKind<'_> { - fn name(&self) -> &str { - match self { - TokenKind::Ident(_) => "identifier", - TokenKind::Num(_) => "number", - TokenKind::Tree { delim, .. } => match delim { - TreeDelim::Paren => "'('-tree", - TreeDelim::Brace => "'{'-tree", - }, - TokenKind::Symbol(s) => s.name(), - } - } -} - -#[derive(Copy, Clone)] -struct Cursor<'a> { - pos: usize, - s: &'a [u8], -} - -impl Cursor<'_> { - fn eof(&self) -> bool { - self.pos >= self.s.len() - } -} - -fn tokenize<'a>(s: &'a [u8]) -> Result>> { - let mut tnz = Tokenizer { - line: 1, - cursor: Cursor { pos: 0, s }, - delim: None, - res: vec![], - }; - tnz.tokenize()?; - Ok(tnz.res) -} - -struct Tokenizer<'a> { - line: u32, - cursor: Cursor<'a>, - delim: Option, - res: Vec>, -} - -impl<'a> Tokenizer<'a> { - fn tokenize_one(&mut self) -> Result { - let c = &mut self.cursor; - while !c.eof() { - let b = c.s[c.pos]; - if matches!(b, b' ' | b'\n' | b'#') { - c.pos += 1; - if b == b'\n' { - self.line += 1; - } else if b == b'#' { - while !c.eof() { - c.pos += 1; - if c.s[c.pos - 1] == b'\n' { - self.line += 1; - break; - } - } - } - } else { - break; - } - } - if c.eof() { - if self.delim.is_some() { - bail!("Unexpected eof"); - } - return Ok(false); - } - let line = self.line; - let b = c.s[c.pos]; - let b_pos = c.pos; - c.pos += 1; - let kind = match b { - b'a'..=b'z' => { - while !c.eof() && matches!(c.s[c.pos], b'a'..=b'z' | b'_' | b'0'..=b'9') { - c.pos += 1; - } - TokenKind::Ident(std::str::from_utf8(&c.s[b_pos..c.pos])?) - } - b'0'..=b'9' => { - c.pos -= 1; - let mut num = 0; - while !c.eof() && matches!(c.s[c.pos], b'0'..=b'9') { - num = num * 10 + (c.s[c.pos] - b'0') as u32; - c.pos += 1; - } - TokenKind::Num(num) - } - b',' => TokenKind::Symbol(Symbol::Comma), - b'=' => TokenKind::Symbol(Symbol::Equals), - b':' => TokenKind::Symbol(Symbol::Colon), - b'(' => self.tokenize_tree(TreeDelim::Paren)?, - b'{' => self.tokenize_tree(TreeDelim::Brace)?, - c @ (b')' | b'}') => { - if self.delim.map(|d| d.closing()) != Some(c) { - bail!("Unexpected '{}' in line {}", c, self.line); - } - return Ok(false); - } - _ => bail!("Unexpected byte {:?} in line {}", b as char, self.line), - }; - self.res.push(Token { line, kind }); - Ok(true) - } - - fn tokenize(&mut self) -> Result<()> { - while self.tokenize_one()? { - // nothing - } - Ok(()) - } - - fn tokenize_tree(&mut self, delim: TreeDelim) -> Result> { - let mut tnz = Tokenizer { - line: self.line, - cursor: self.cursor, - delim: Some(delim), - res: vec![], - }; - tnz.tokenize().with_context(|| { - format!( - "While tokenizing {:?} block starting in line {}", - delim.opening() as char, - self.line - ) - })?; - self.cursor.pos = tnz.cursor.pos; - self.line = tnz.line; - Ok(TokenKind::Tree { - delim, - body: tnz.res, - }) - } -} - -#[derive(Debug)] -struct Lined { - #[expect(dead_code)] - line: u32, - val: T, -} - -#[derive(Debug)] -enum Type { - Id(String), - U32, - I32, - U64, - U64Rev, - Str, - OptStr, - BStr, - Fixed, - Fd, - Array(Box), - Pod(String), -} - -#[derive(Debug)] -struct Field { - name: String, - ty: Lined, -} - -#[derive(Debug)] -struct Message { - name: String, - camel_name: String, - safe_name: String, - id: u32, - fields: Vec>, - attribs: MessageAttribs, - has_reference_type: bool, -} - -#[derive(Debug, Default)] -struct MessageAttribs { - since: Option, -} - -struct Parser<'a> { - pos: usize, - tokens: &'a [Token<'a>], -} - -struct ParseResult { - requests: Vec>, - events: Vec>, -} - -impl<'a> Parser<'a> { - fn parse(&mut self) -> Result { - let mut requests = vec![]; - let mut events = vec![]; - while !self.eof() { - let (line, ty) = self.expect_ident()?; - let res = match ty.as_bytes() { - b"request" => &mut requests, - b"event" => &mut events, - _ => bail!("In line {}: Unexpected entry {:?}", line, ty), - }; - res.push(self.parse_message(res.len() as _)?); - } - Ok(ParseResult { requests, events }) - } - - fn eof(&self) -> bool { - self.pos == self.tokens.len() - } - - fn not_eof(&self) -> Result<()> { - if self.eof() { - bail!("Unexpected eof"); - } - Ok(()) - } - - fn yes_eof(&self) -> Result<()> { - if !self.eof() { - bail!( - "Unexpected trailing tokens in line {}", - self.tokens[self.pos].line - ); - } - Ok(()) - } - - fn parse_message_attribs(&mut self, attribs: &mut MessageAttribs) -> Result<()> { - let (_, tokens) = self.expect_tree(TreeDelim::Paren)?; - let mut parser = Parser { pos: 0, tokens }; - while !parser.eof() { - let (line, name) = parser.expect_ident()?; - parser.expect_symbol(Symbol::Equals)?; - match name { - "since" => attribs.since = Some(parser.expect_number()?.1), - _ => bail!("In line {}: Unexpected attribute {}", line, name), - } - } - Ok(()) - } - - fn parse_message(&mut self, id: u32) -> Result> { - let (line, name) = self.expect_ident()?; - let res: Result<_> = (|| { - self.not_eof()?; - let mut attribs = MessageAttribs::default(); - if let TokenKind::Tree { - delim: TreeDelim::Paren, - .. - } = self.tokens[self.pos].kind - { - self.parse_message_attribs(&mut attribs)?; - } - let (_, body) = self.expect_tree(TreeDelim::Brace)?; - let mut parser = Parser { - pos: 0, - tokens: body, - }; - let mut fields = vec![]; - while !parser.eof() { - fields.push(parser.parse_field()?); - } - let has_reference_type = fields.iter().any(|f| match &f.val.ty.val { - Type::OptStr | Type::Str | Type::BStr | Type::Array(..) => true, - _ => false, - }); - let safe_name = match name { - "move" => "move_", - "type" => "type_", - "drop" => "drop_", - "id" => "id_", - _ => name, - }; - Ok(Lined { - line, - val: Message { - name: name.to_owned(), - camel_name: to_camel(name), - safe_name: safe_name.to_string(), - id, - fields, - attribs, - has_reference_type, - }, - }) - })(); - res.with_context(|| format!("While parsing message starting at line {}", line)) - } - - fn parse_field(&mut self) -> Result> { - let (line, name) = self.expect_ident()?; - let res: Result<_> = (|| { - self.expect_symbol(Symbol::Colon)?; - let ty = self.parse_type()?; - if !self.eof() { - self.expect_symbol(Symbol::Comma)?; - } - Ok(Lined { - line, - val: Field { - name: name.to_owned(), - ty, - }, - }) - })(); - res.with_context(|| format!("While parsing field starting at line {}", line)) - } - - fn expect_ident(&mut self) -> Result<(u32, &'a str)> { - self.not_eof()?; - let token = &self.tokens[self.pos]; - self.pos += 1; - match &token.kind { - TokenKind::Ident(id) => Ok((token.line, *id)), - k => bail!( - "In line {}: Expected identifier, found {}", - token.line, - k.name() - ), - } - } - - fn expect_number(&mut self) -> Result<(u32, u32)> { - self.not_eof()?; - let token = &self.tokens[self.pos]; - self.pos += 1; - match &token.kind { - TokenKind::Num(n) => Ok((token.line, *n)), - k => bail!( - "In line {}: Expected number, found {}", - token.line, - k.name() - ), - } - } - - fn expect_symbol(&mut self, symbol: Symbol) -> Result<()> { - self.not_eof()?; - let token = &self.tokens[self.pos]; - self.pos += 1; - match &token.kind { - TokenKind::Symbol(s) if *s == symbol => Ok(()), - k => bail!( - "In line {}: Expected {}, found {}", - token.line, - symbol.name(), - k.name() - ), - } - } - - fn expect_tree_(&mut self) -> Result<(u32, TreeDelim, &'a [Token<'a>])> { - self.not_eof()?; - let token = &self.tokens[self.pos]; - self.pos += 1; - match &token.kind { - TokenKind::Tree { delim, body } => Ok((token.line, *delim, body)), - k => bail!("In line {}: Expected tree, found {}", token.line, k.name()), - } - } - - fn expect_tree(&mut self, exp_delim: TreeDelim) -> Result<(u32, &'a [Token<'a>])> { - let (line, delim, tokens) = self.expect_tree_()?; - if delim == exp_delim { - Ok((line, tokens)) - } else { - bail!( - "In line {}: Expected {:?}-delimited tree, found {:?}-delimited tree", - line, - exp_delim, - delim.opening() - ) - } - } - - fn parse_rust_path(&mut self) -> Result> { - let mut path = String::new(); - let mut line = None; - loop { - self.not_eof()?; - let (l, id) = self.expect_ident()?; - if line.is_none() { - line = Some(l); - } - path.push_str(id); - if self.eof() { - break; - } - self.expect_symbol(Symbol::Colon)?; - self.expect_symbol(Symbol::Colon)?; - path.push_str("::"); - } - Ok(Lined { - line: line.unwrap(), - val: path, - }) - } - - fn parse_type(&mut self) -> Result> { - self.not_eof()?; - let (line, ty) = self.expect_ident()?; - let ty = match ty.as_bytes() { - b"pod" => { - let (line, body) = self.expect_tree(TreeDelim::Paren)?; - let mut parser = Parser { - pos: 0, - tokens: body, - }; - let ty = parser.parse_rust_path().with_context(|| { - format!("While parsing pod element type starting in line {}", line) - })?; - Type::Pod(ty.val) - } - b"u64" => Type::U64, - b"u64_rev" => Type::U64Rev, - b"u32" => Type::U32, - b"i32" => Type::I32, - b"str" => Type::Str, - b"optstr" => Type::OptStr, - b"bstr" => Type::BStr, - b"fixed" => Type::Fixed, - b"fd" => Type::Fd, - b"array" => { - let (line, body) = self.expect_tree(TreeDelim::Paren)?; - let ty: Result<_> = (|| { - let mut parser = Parser { - pos: 0, - tokens: body, - }; - let ty = parser.parse_type()?; - parser.yes_eof()?; - match &ty.val { - Type::Id(_) => {} - Type::U32 => {} - Type::I32 => {} - Type::U64 => {} - Type::U64Rev => {} - Type::Fixed => {} - Type::Pod(..) => {} - _ => { - bail!("Only numerical and pod types can be array elements"); - } - } - Ok(ty) - })(); - let ty = ty.with_context(|| { - format!("While parsing array element type starting in line {}", line) - })?; - Type::Array(Box::new(ty.val)) - } - b"id" => { - let (_, body) = self.expect_tree(TreeDelim::Paren)?; - let ident: Result<_> = (|| { - let mut parser = Parser { - pos: 0, - tokens: body, - }; - let id = parser.expect_ident()?; - parser.yes_eof()?; - Ok(id) - })(); - let (_, ident) = ident.with_context(|| { - format!("While parsing identifier starting in line {}", line) - })?; - Type::Id(to_camel(ident)) - } - _ => bail!("Unknown type {}", ty), - }; - Ok(Lined { line, val: ty }) - } -} - -fn parse_messages(s: &[u8]) -> Result { - let tokens = tokenize(s)?; - let mut parser = Parser { - pos: 0, - tokens: &tokens, - }; - parser.parse() -} - -fn to_camel(s: &str) -> String { - let mut last_was_underscore = true; - let mut res = String::new(); - for mut b in s.as_bytes().iter().copied() { - if b == b'_' { - last_was_underscore = true; - } else { - if last_was_underscore { - b = b.to_ascii_uppercase() - } - res.push(b as char); - last_was_underscore = false; - } - } - res -} - fn write_type(f: &mut W, ty: &Type) -> Result<()> { match ty { - Type::Id(id) => write!(f, "{}Id", id)?, + Type::Id(_, id) => write!(f, "{}Id", id)?, Type::U32 => write!(f, "u32")?, Type::I32 => write!(f, "i32")?, Type::U64 => write!(f, "u64")?, @@ -667,7 +114,7 @@ fn write_message(f: &mut W, obj: &str, message: &Message) -> Result<() writeln!(f, " self_id: {}Id::NONE,", obj)?; for field in &message.fields { let p = match &field.val.ty.val { - Type::Id(_) => "object", + Type::Id(..) => "object", Type::U32 => "uint", Type::I32 => "int", Type::U64 => "u64", @@ -694,7 +141,7 @@ fn write_message(f: &mut W, obj: &str, message: &Message) -> Result<() writeln!(f, " fmt.header(self.self_id, {});", uppercase)?; fn write_fmt_expr(f: &mut W, prefix: &str, ty: &Type, access: &str) -> Result<()> { let p = match ty { - Type::Id(_) => "object", + Type::Id(..) => "object", Type::U32 => "uint", Type::I32 => "int", Type::U64 => "u64", diff --git a/build/wire/parser.rs b/build/wire/parser.rs new file mode 100644 index 00000000..e8e8b822 --- /dev/null +++ b/build/wire/parser.rs @@ -0,0 +1,605 @@ +use anyhow::{Context, Result, bail}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum TreeDelim { + Paren, + Brace, +} + +impl TreeDelim { + fn opening(self) -> u8 { + match self { + TreeDelim::Paren => b'(', + TreeDelim::Brace => b'{', + } + } + + fn closing(self) -> u8 { + match self { + TreeDelim::Paren => b')', + TreeDelim::Brace => b'}', + } + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Symbol { + Comma, + Colon, + Equals, +} + +impl Symbol { + fn name(self) -> &'static str { + match self { + Symbol::Comma => "','", + Symbol::Colon => "':'", + Symbol::Equals => "'='", + } + } +} + +#[derive(Debug)] +struct Token<'a> { + line: u32, + kind: TokenKind<'a>, +} + +#[derive(Debug)] +enum TokenKind<'a> { + Ident(&'a str), + Num(u32), + Tree { + delim: TreeDelim, + body: Vec>, + }, + Symbol(Symbol), +} + +impl TokenKind<'_> { + fn name(&self) -> &str { + match self { + TokenKind::Ident(_) => "identifier", + TokenKind::Num(_) => "number", + TokenKind::Tree { delim, .. } => match delim { + TreeDelim::Paren => "'('-tree", + TreeDelim::Brace => "'{'-tree", + }, + TokenKind::Symbol(s) => s.name(), + } + } +} + +#[derive(Copy, Clone)] +struct Cursor<'a> { + pos: usize, + s: &'a [u8], +} + +impl Cursor<'_> { + fn eof(&self) -> bool { + self.pos >= self.s.len() + } +} + +fn tokenize<'a>(s: &'a [u8]) -> Result>> { + let mut tnz = Tokenizer { + line: 1, + cursor: Cursor { pos: 0, s }, + delim: None, + res: vec![], + }; + tnz.tokenize()?; + Ok(tnz.res) +} + +struct Tokenizer<'a> { + line: u32, + cursor: Cursor<'a>, + delim: Option, + res: Vec>, +} + +impl<'a> Tokenizer<'a> { + fn tokenize_one(&mut self) -> Result { + let c = &mut self.cursor; + while !c.eof() { + let b = c.s[c.pos]; + if matches!(b, b' ' | b'\n' | b'#') { + c.pos += 1; + if b == b'\n' { + self.line += 1; + } else if b == b'#' { + while !c.eof() { + c.pos += 1; + if c.s[c.pos - 1] == b'\n' { + self.line += 1; + break; + } + } + } + } else { + break; + } + } + if c.eof() { + if self.delim.is_some() { + bail!("Unexpected eof"); + } + return Ok(false); + } + let line = self.line; + let b = c.s[c.pos]; + let b_pos = c.pos; + c.pos += 1; + let kind = match b { + b'a'..=b'z' => { + while !c.eof() && matches!(c.s[c.pos], b'a'..=b'z' | b'_' | b'0'..=b'9') { + c.pos += 1; + } + TokenKind::Ident(std::str::from_utf8(&c.s[b_pos..c.pos])?) + } + b'0'..=b'9' => { + c.pos -= 1; + let mut num = 0; + while !c.eof() && matches!(c.s[c.pos], b'0'..=b'9') { + num = num * 10 + (c.s[c.pos] - b'0') as u32; + c.pos += 1; + } + TokenKind::Num(num) + } + b',' => TokenKind::Symbol(Symbol::Comma), + b'=' => TokenKind::Symbol(Symbol::Equals), + b':' => TokenKind::Symbol(Symbol::Colon), + b'(' => self.tokenize_tree(TreeDelim::Paren)?, + b'{' => self.tokenize_tree(TreeDelim::Brace)?, + c @ (b')' | b'}') => { + if self.delim.map(|d| d.closing()) != Some(c) { + bail!("Unexpected '{}' in line {}", c, self.line); + } + return Ok(false); + } + _ => bail!("Unexpected byte {:?} in line {}", b as char, self.line), + }; + self.res.push(Token { line, kind }); + Ok(true) + } + + fn tokenize(&mut self) -> Result<()> { + while self.tokenize_one()? { + // nothing + } + Ok(()) + } + + fn tokenize_tree(&mut self, delim: TreeDelim) -> Result> { + let mut tnz = Tokenizer { + line: self.line, + cursor: self.cursor, + delim: Some(delim), + res: vec![], + }; + tnz.tokenize().with_context(|| { + format!( + "While tokenizing {:?} block starting in line {}", + delim.opening() as char, + self.line + ) + })?; + self.cursor.pos = tnz.cursor.pos; + self.line = tnz.line; + Ok(TokenKind::Tree { + delim, + body: tnz.res, + }) + } +} + +#[derive(Debug)] +pub struct Lined { + #[expect(dead_code)] + pub line: u32, + pub val: T, +} + +#[derive(Debug)] +pub enum Type { + Id(#[allow(dead_code)] String, String), + U32, + I32, + U64, + U64Rev, + Str, + OptStr, + BStr, + Fixed, + Fd, + Array(Box), + Pod(String), +} + +#[derive(Debug)] +pub struct Field { + pub name: String, + pub ty: Lined, + #[allow(dead_code)] + pub attribs: FieldAttribs, +} + +#[derive(Debug)] +pub struct Message { + pub name: String, + pub camel_name: String, + pub safe_name: String, + pub id: u32, + pub fields: Vec>, + pub attribs: MessageAttribs, + pub has_reference_type: bool, +} + +#[derive(Debug, Default)] +pub struct MessageAttribs { + pub since: Option, + pub destructor: bool, +} + +#[derive(Debug, Default)] +pub struct FieldAttribs { + pub new: bool, + pub nullable: bool, +} + +struct Parser<'a> { + pos: usize, + tokens: &'a [Token<'a>], +} + +#[derive(Debug)] +pub struct ParseResult { + pub requests: Vec>, + pub events: Vec>, +} + +impl<'a> Parser<'a> { + fn parse(&mut self) -> Result { + let mut requests = vec![]; + let mut events = vec![]; + while !self.eof() { + let (line, ty) = self.expect_ident()?; + let res = match ty.as_bytes() { + b"request" => &mut requests, + b"event" => &mut events, + _ => bail!("In line {}: Unexpected entry {:?}", line, ty), + }; + res.push(self.parse_message(res.len() as _)?); + } + Ok(ParseResult { requests, events }) + } + + fn eof(&self) -> bool { + self.pos == self.tokens.len() + } + + fn not_eof(&self) -> Result<()> { + if self.eof() { + bail!("Unexpected eof"); + } + Ok(()) + } + + fn yes_eof(&self) -> Result<()> { + if !self.eof() { + bail!( + "Unexpected trailing tokens in line {}", + self.tokens[self.pos].line + ); + } + Ok(()) + } + + fn parse_message_attribs(&mut self, attribs: &mut MessageAttribs) -> Result<()> { + let (_, tokens) = self.expect_tree(TreeDelim::Paren)?; + let mut parser = Parser { pos: 0, tokens }; + while !parser.eof() { + let (line, name) = parser.expect_ident()?; + match name { + "since" => { + parser.expect_symbol(Symbol::Equals)?; + attribs.since = Some(parser.expect_number()?.1) + } + "destructor" => { + attribs.destructor = true; + } + _ => bail!("In line {}: Unexpected attribute {}", line, name), + } + if !parser.eof() { + parser.expect_symbol(Symbol::Comma)?; + } + } + Ok(()) + } + + fn parse_message(&mut self, id: u32) -> Result> { + let (line, name) = self.expect_ident()?; + let res: Result<_> = (|| { + self.not_eof()?; + let mut attribs = MessageAttribs::default(); + if let TokenKind::Tree { + delim: TreeDelim::Paren, + .. + } = self.tokens[self.pos].kind + { + self.parse_message_attribs(&mut attribs)?; + } + let (_, body) = self.expect_tree(TreeDelim::Brace)?; + let mut parser = Parser { + pos: 0, + tokens: body, + }; + let mut fields = vec![]; + while !parser.eof() { + fields.push(parser.parse_field()?); + } + let has_reference_type = fields.iter().any(|f| match &f.val.ty.val { + Type::OptStr | Type::Str | Type::BStr | Type::Array(..) => true, + _ => false, + }); + let safe_name = match name { + "move" => "move_", + "type" => "type_", + "drop" => "drop_", + "id" => "id_", + _ => name, + }; + Ok(Lined { + line, + val: Message { + name: name.to_owned(), + camel_name: to_camel(name), + safe_name: safe_name.to_string(), + id, + fields, + attribs, + has_reference_type, + }, + }) + })(); + res.with_context(|| format!("While parsing message starting at line {}", line)) + } + + fn parse_field_attribs(&mut self, attribs: &mut FieldAttribs) -> Result<()> { + let (_, tokens) = self.expect_tree(TreeDelim::Paren)?; + let mut parser = Parser { pos: 0, tokens }; + while !parser.eof() { + let (line, name) = parser.expect_ident()?; + match name { + "new" => attribs.new = true, + "nullable" => attribs.nullable = true, + _ => bail!("In line {}: Unexpected attribute {}", line, name), + } + if !parser.eof() { + parser.expect_symbol(Symbol::Comma)?; + } + } + Ok(()) + } + + fn parse_field(&mut self) -> Result> { + let (line, name) = self.expect_ident()?; + let res: Result<_> = (|| { + self.expect_symbol(Symbol::Colon)?; + let ty = self.parse_type()?; + let mut attribs = FieldAttribs::default(); + if !self.eof() { + if let TokenKind::Tree { + delim: TreeDelim::Paren, + .. + } = self.tokens[self.pos].kind + { + self.parse_field_attribs(&mut attribs)?; + } + } + if !self.eof() { + self.expect_symbol(Symbol::Comma)?; + } + Ok(Lined { + line, + val: Field { + name: name.to_owned(), + ty, + attribs, + }, + }) + })(); + res.with_context(|| format!("While parsing field starting at line {}", line)) + } + + fn expect_ident(&mut self) -> Result<(u32, &'a str)> { + self.not_eof()?; + let token = &self.tokens[self.pos]; + self.pos += 1; + match &token.kind { + TokenKind::Ident(id) => Ok((token.line, *id)), + k => bail!( + "In line {}: Expected identifier, found {}", + token.line, + k.name() + ), + } + } + + fn expect_number(&mut self) -> Result<(u32, u32)> { + self.not_eof()?; + let token = &self.tokens[self.pos]; + self.pos += 1; + match &token.kind { + TokenKind::Num(n) => Ok((token.line, *n)), + k => bail!( + "In line {}: Expected number, found {}", + token.line, + k.name() + ), + } + } + + fn expect_symbol(&mut self, symbol: Symbol) -> Result<()> { + self.not_eof()?; + let token = &self.tokens[self.pos]; + self.pos += 1; + match &token.kind { + TokenKind::Symbol(s) if *s == symbol => Ok(()), + k => bail!( + "In line {}: Expected {}, found {}", + token.line, + symbol.name(), + k.name() + ), + } + } + + fn expect_tree_(&mut self) -> Result<(u32, TreeDelim, &'a [Token<'a>])> { + self.not_eof()?; + let token = &self.tokens[self.pos]; + self.pos += 1; + match &token.kind { + TokenKind::Tree { delim, body } => Ok((token.line, *delim, body)), + k => bail!("In line {}: Expected tree, found {}", token.line, k.name()), + } + } + + fn expect_tree(&mut self, exp_delim: TreeDelim) -> Result<(u32, &'a [Token<'a>])> { + let (line, delim, tokens) = self.expect_tree_()?; + if delim == exp_delim { + Ok((line, tokens)) + } else { + bail!( + "In line {}: Expected {:?}-delimited tree, found {:?}-delimited tree", + line, + exp_delim, + delim.opening() + ) + } + } + + fn parse_rust_path(&mut self) -> Result> { + let mut path = String::new(); + let mut line = None; + loop { + self.not_eof()?; + let (l, id) = self.expect_ident()?; + if line.is_none() { + line = Some(l); + } + path.push_str(id); + if self.eof() { + break; + } + self.expect_symbol(Symbol::Colon)?; + self.expect_symbol(Symbol::Colon)?; + path.push_str("::"); + } + Ok(Lined { + line: line.unwrap(), + val: path, + }) + } + + fn parse_type(&mut self) -> Result> { + self.not_eof()?; + let (line, ty) = self.expect_ident()?; + let ty = match ty.as_bytes() { + b"pod" => { + let (line, body) = self.expect_tree(TreeDelim::Paren)?; + let mut parser = Parser { + pos: 0, + tokens: body, + }; + let ty = parser.parse_rust_path().with_context(|| { + format!("While parsing pod element type starting in line {}", line) + })?; + Type::Pod(ty.val) + } + b"u64" => Type::U64, + b"u64_rev" => Type::U64Rev, + b"u32" => Type::U32, + b"i32" => Type::I32, + b"str" => Type::Str, + b"optstr" => Type::OptStr, + b"bstr" => Type::BStr, + b"fixed" => Type::Fixed, + b"fd" => Type::Fd, + b"array" => { + let (line, body) = self.expect_tree(TreeDelim::Paren)?; + let ty: Result<_> = (|| { + let mut parser = Parser { + pos: 0, + tokens: body, + }; + let ty = parser.parse_type()?; + parser.yes_eof()?; + match &ty.val { + Type::Id(..) => {} + Type::U32 => {} + Type::I32 => {} + Type::U64 => {} + Type::U64Rev => {} + Type::Fixed => {} + Type::Pod(..) => {} + _ => { + bail!("Only numerical and pod types can be array elements"); + } + } + Ok(ty) + })(); + let ty = ty.with_context(|| { + format!("While parsing array element type starting in line {}", line) + })?; + Type::Array(Box::new(ty.val)) + } + b"id" => { + let (_, body) = self.expect_tree(TreeDelim::Paren)?; + let ident: Result<_> = (|| { + let mut parser = Parser { + pos: 0, + tokens: body, + }; + let id = parser.expect_ident()?; + parser.yes_eof()?; + Ok(id) + })(); + let (_, ident) = ident.with_context(|| { + format!("While parsing identifier starting in line {}", line) + })?; + Type::Id(ident.to_owned(), to_camel(ident)) + } + _ => bail!("Unknown type {}", ty), + }; + Ok(Lined { line, val: ty }) + } +} + +pub fn parse_messages(s: &[u8]) -> Result { + let tokens = tokenize(s)?; + let mut parser = Parser { + pos: 0, + tokens: &tokens, + }; + parser.parse() +} + +pub fn to_camel(s: &str) -> String { + let mut last_was_underscore = true; + let mut res = String::new(); + for mut b in s.as_bytes().iter().copied() { + if b == b'_' { + last_was_underscore = true; + } else { + if last_was_underscore { + b = b.to_ascii_uppercase() + } + res.push(b as char); + last_was_underscore = false; + } + } + res +} diff --git a/wire-to-xml/Cargo.toml b/wire-to-xml/Cargo.toml new file mode 100644 index 00000000..c8c96328 --- /dev/null +++ b/wire-to-xml/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "wire-to-xml" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.79" +clap = { version = "4.4.18", features = ["derive", "wrap_help"] } +quick-xml = "0.38.0" diff --git a/wire-to-xml/src/main.rs b/wire-to-xml/src/main.rs new file mode 100644 index 00000000..34ccbb92 --- /dev/null +++ b/wire-to-xml/src/main.rs @@ -0,0 +1,180 @@ +use { + crate::parser::{Type, parse_messages}, + clap::Parser, + quick_xml::events::{BytesDecl, BytesText, Event}, + std::{io, os::unix::ffi::OsStrExt, path::PathBuf}, +}; + +#[path = "../../build/wire/parser.rs"] +#[allow(dead_code)] +mod parser; + +#[derive(Parser, Debug)] +struct Cli { + protocol: String, + files: Vec, +} + +fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + let mut writer = quick_xml::Writer::new_with_indent(io::stdout().lock(), b' ', 2); + writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)))?; + writer + .create_element("protocol") + .with_attribute(("name", &*cli.protocol)) + .write_inner_content(|w| { + w.create_element("copyright").write_inner_content(|w| { + for line in COPYRIGHT.lines() { + w.write_indent()?; + w.write_event(Event::Text(BytesText::new(line)))?; + } + Ok(()) + })?; + w.create_element("description") + .with_attribute(("summary", "")) + .write_empty()?; + for f in &cli.files { + let res = parse_messages(std::fs::read(f)?.as_slice()) + .map_err(|e| io::Error::other(e))?; + let if_name = f.file_stem().unwrap(); + let version = res + .events + .iter() + .chain(res.requests.iter()) + .map(|e| e.val.attribs.since.unwrap_or(1)) + .max() + .unwrap_or(1); + w.create_element("interface") + .with_attribute((&b"name"[..], if_name.as_bytes())) + .with_attribute(("version", &*version.to_string())) + .write_inner_content(|w| { + w.create_element("description") + .with_attribute(("summary", "")) + .write_empty()?; + for (ty, messages) in [("request", &res.requests), ("event", &res.events)] { + for message in messages { + let mut el = w + .create_element(ty) + .with_attribute(("name", &*message.val.name)); + if let Some(since) = message.val.attribs.since { + el = el.with_attribute(("since", &*since.to_string())); + } + if message.val.attribs.destructor { + el = el.with_attribute(("type", "destructor")); + } + el.write_inner_content(|w| { + w.create_element("description") + .with_attribute(("summary", "")) + .write_empty()?; + let mut i = 0; + while i < message.val.fields.len() { + let j = i + 2; + if j < message.val.fields.len() { + if let Type::Id(name, _) = + &message.val.fields[j].val.ty.val + { + if name == "object" { + i = j; + } + } + } + let field = &message.val.fields[i]; + let mut el = w.create_element("arg"); + macro_rules! simple { + ($ty:expr) => { + el = el + .with_attribute(("name", &*field.val.name)) + .with_attribute(("type", $ty)); + }; + } + match &field.val.ty.val { + Type::Id(name, _) => { + let ty = match field.val.attribs.new { + true => "new_id", + false => "object", + }; + simple!(ty); + if name != "object" { + el = el.with_attribute(("interface", &**name)); + } + if field.val.attribs.nullable { + el = el.with_attribute(("allow-null", "true")); + } + } + Type::U32 => { + simple!("uint"); + } + Type::I32 => { + simple!("int"); + } + t @ Type::U64 | t @ Type::U64Rev => { + let mut suf = ["hi", "lo"]; + if let Type::U64Rev = t { + suf = ["lo", "hi"]; + } + el.with_attribute(( + "name", + &*format!("{}_{}", field.val.name, suf[0]), + )) + .with_attribute(("type", "uint")) + .with_attribute(("description", "")) + .write_empty()?; + el = w + .create_element("arg") + .with_attribute(( + "name", + &*format!("{}_{}", field.val.name, suf[1]), + )) + .with_attribute(("type", "uint")); + } + Type::Str | Type::BStr => { + simple!("string"); + } + Type::OptStr => { + simple!("string"); + el = el.with_attribute(("allow-null", "true")); + } + Type::Fixed => { + simple!("fixed"); + } + Type::Fd => { + simple!("fd"); + } + Type::Array(_) | Type::Pod(_) => { + simple!("array"); + } + } + el.with_attribute(("description", "")).write_empty()?; + i += 1; + } + Ok(()) + })?; + } + } + Ok(()) + })?; + } + Ok(()) + })?; + Ok(()) +} + +const COPYRIGHT: &str = r#"Copyright 20XX YY + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +"#;