From a45eda2bd4230f2f0c00deb3e86b894d51b30bad Mon Sep 17 00:00:00 2001 From: hellerve Date: Fri, 25 Aug 2017 17:27:57 +0200 Subject: [PATCH] can now convert from html to ir and back --- Cargo.toml | 5 +- README.md | 2 +- src/css.rs | 283 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/dom.rs | 45 ++++++--- src/html.rs | 86 +++++++++------- 5 files changed, 363 insertions(+), 58 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1e87aad..1778c00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,11 @@ [package] -name = "b" +name = "r" version = "0.1.0" authors = ["hellerve "] [[bin]] -name = "b" +name = "r" path = "src/main.rs" [dependencies] +getopts = "0.2.3" diff --git a/README.md b/README.md index 0314970..081a10d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# b +# r A browser engine written in Rust. I heard that’s what the cool kids do these days. diff --git a/src/css.rs b/src/css.rs index 4777a3e..abb4e6c 100644 --- a/src/css.rs +++ b/src/css.rs @@ -1,7 +1,4 @@ -enum Selector { - Simple(SimpleSelector), - Chain(ChainSelector), -} +use std; struct SimpleSelector { tag_name: Option, @@ -15,10 +12,57 @@ struct ChainSelector { class: Vec>, } +enum Selector { + Simple(SimpleSelector), + //Chain(ChainSelector), +} + +pub type Specificity = (usize, usize, usize); + +impl Selector { + pub fn specificity(&self) -> Specificity { + let Selector::Simple(ref simple) = *self; + let a = simple.id.iter().count(); + let b = simple.class.len(); + let c = simple.tag_name.iter().count(); + (a, b, c) + } +} + +impl std::fmt::Display for Selector { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + Selector::Simple(ref s) => { + let tag = s.tag_name.clone().unwrap_or("".to_string()); + let id = s.id.clone().unwrap_or("".to_string()); + write!(f, "{}{}{}", tag, + id, + s.class.join(" ")) + } + } + } +} + enum Unit { Px, Em, Rm, + Vw, + Vh, + Perc +} + +impl std::fmt::Display for Unit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + Unit::Px => write!(f, "px"), + Unit::Em => write!(f, "em"), + Unit::Rm => write!(f, "rm"), + Unit::Vw => write!(f, "vw"), + Unit::Vh => write!(f, "vh"), + Unit::Perc => write!(f, "%"), + } + } } struct Color { @@ -34,16 +78,245 @@ enum Value { ColorValue(Color), } +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + Value::Keyword(ref s) => write!(f, "{}", s), + Value::Length(n, ref u) => write!(f, "{}{}", n, u), + Value::ColorValue(ref c) => write!(f, "rgba({}, {}, {}, {})", c.r, c.g, c.b, c.a), + } + } +} + struct Declaration { name: String, value: Value, } +impl std::fmt::Display for Declaration { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}: {};", self.name, self.value) + } +} + struct Rule { selectors: Vec, declarations: Vec, } -struct Stylesheet { +impl std::fmt::Display for Rule { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut selector = "".to_string(); + for s in self.selectors.iter() { + if selector.len() == 0 { + selector = format!("{}", s); + } else { + selector = format!("{}, {}", selector, s); + } + } + write!(f, "{}", selector); + write!(f, "{{ "); + for decl in self.declarations.iter() { + write!(f, "{}", decl); + } + write!(f, " }}") + } +} + +pub struct Stylesheet { rules: Vec, } + +impl std::fmt::Display for Stylesheet { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for rule in self.rules.iter() { + write!(f, "{}", rule); + } + return Ok(()) + } +} + +struct Parser { + pos: usize, + input: String, +} + +impl Parser { + fn parse_rules(&mut self) -> Vec { + let mut rules = Vec::new(); + loop { + self.consume_ws(); + if self.eof() { break } + rules.push(self.parse_rule()); + } + rules + } + + fn parse_rule(&mut self) -> Rule { + Rule { + selectors: self.parse_selectors(), + declarations: self.parse_declarations(), + } + } + + fn parse_selectors(&mut self) -> Vec { + let mut selectors = Vec::new(); + loop { + selectors.push(Selector::Simple(self.parse_simple_selector())); + self.consume_ws(); + match self.next() { + ',' => { self.consume(); self.consume_ws(); } + '{' => break, + c => panic!("Unexpected character {} in selector list", c) + } + } + selectors.sort_by(|a,b| b.specificity().cmp(&a.specificity())); + selectors + } + + fn parse_simple_selector(&mut self) -> SimpleSelector { + let mut selector = SimpleSelector { tag_name: None, id: None, class: Vec::new() }; + while !self.eof() { + match self.next() { + '#' => { + self.consume(); + selector.id = Some(self.parse_id()); + } + '.' => { + self.consume(); + selector.class.push(self.parse_id()); + } + '*' => { + self.consume(); + } + c if valid_identifier_char(c) => { + selector.tag_name = Some(self.parse_id()); + } + _ => break + } + } + selector + } + + fn parse_declarations(&mut self) -> Vec { + assert!(self.consume() == '{'); + let mut declarations = Vec::new(); + loop { + self.consume_ws(); + if self.next() == '}' { + self.consume(); + break; + } + declarations.push(self.parse_declaration()); + } + declarations + } + + fn parse_declaration(&mut self) -> Declaration { + let property_name = self.parse_id(); + self.consume_ws(); + assert!(self.consume() == ':'); + self.consume_ws(); + let value = self.parse_value(); + self.consume_ws(); + assert!(self.consume() == ';'); + + Declaration { + name: property_name, + value: value, + } + } + + fn parse_value(&mut self) -> Value { + match self.next() { + '0'...'9' => self.parse_length(), + '#' => self.parse_color(), + _ => Value::Keyword(self.parse_id()) + } + } + + fn parse_length(&mut self) -> Value { + Value::Length(self.parse_float(), self.parse_unit()) + } + + fn parse_float(&mut self) -> f32 { + let s = self.consume_while(|c| match c { + '0'...'9' | '.' => true, + _ => false + }); + s.parse().unwrap() + } + + fn parse_unit(&mut self) -> Unit { + match &*self.parse_id().to_lowercase() { + "px" => Unit::Px, + "em" => Unit::Em, + "rm" => Unit::Rm, + "vw" => Unit::Vw, + "vh" => Unit::Vh, + "%" => Unit::Perc, + _ => panic!("unrecognized unit") + } + } + + fn parse_color(&mut self) -> Value { + assert!(self.consume() == '#'); + Value::ColorValue(Color { + r: self.parse_hex_pair(), + g: self.parse_hex_pair(), + b: self.parse_hex_pair(), + a: 255 }) + } + + fn parse_hex_pair(&mut self) -> u8 { + let s = &self.input[self.pos .. self.pos + 2]; + self.pos += 2; + u8::from_str_radix(s, 16).unwrap() + } + + fn parse_id(&mut self) -> String { + self.consume_while(valid_identifier_char) + } + + fn consume_ws(&mut self) { + self.consume_while(char::is_whitespace); + } + + fn consume_while(&mut self, test: F) -> String + where F: Fn(char) -> bool { + let mut result = String::new(); + while !self.eof() && test(self.next()) { + result.push(self.consume()); + } + result + } + + fn consume(&mut self) -> char { + let mut iter = self.input[self.pos..].char_indices(); + let (_, cur_char) = iter.next().unwrap(); + let (next_pos, _) = iter.next().unwrap_or((1, ' ')); + self.pos += next_pos; + cur_char + } + + fn next(&self) -> char { + self.input[self.pos..].chars().next().unwrap() + } + + fn eof(&self) -> bool { + self.pos >= self.input.len() + } +} + +fn valid_identifier_char(c: char) -> bool { + match c { + 'a'...'z' | 'A'...'Z' | '0'...'9' | '-' | '_' => true, + _ => false, + } +} + +pub fn parse(source: String) -> Stylesheet { + let mut parser = Parser { pos: 0, input: source }; + Stylesheet { rules: parser.parse_rules() } +} + diff --git a/src/dom.rs b/src/dom.rs index 1eebd0d..d63b07c 100644 --- a/src/dom.rs +++ b/src/dom.rs @@ -1,7 +1,9 @@ use std::collections::HashMap; +use std; +use css; -struct Attr { +pub struct Attr { attrs: HashMap, } @@ -10,27 +12,32 @@ struct EData { attr: Attr, } +struct SData { + attr: Attr, + content: css::Stylesheet, +} + enum NType { Text(String), Comment(String), Element(EData), + Stylesheet(SData) } - -struct Node { +pub struct Node { children: Vec, ntype: NType, } -fn text(d: String) -> Node { +pub fn text(d: String) -> Node { Node { children: Vec::new(), ntype: NType::Text(d) } } -fn comment(d: String) -> Node { +pub fn comment(d: String) -> Node { Node { children: Vec::new(), ntype: NType::Comment(d) } } -fn elem(name: String, attr: Attr, children: Vec) -> Node { +pub fn elem(name: String, attr: Attr, children: Vec) -> Node { Node { children: children, ntype: NType::Element(EData { @@ -40,20 +47,35 @@ fn elem(name: String, attr: Attr, children: Vec) -> Node { } } +pub fn attr(attrs: HashMap) -> Attr { + Attr { attrs } +} + +pub fn style(style: String, attr: Attr) -> Node { + Node { + children: Vec::new(), + ntype: NType::Stylesheet(SData { + content: css::parse(style), + attr: attr, + }) + } +} + impl std::fmt::Display for Node { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self.ntype { NType::Text(ref s) => write!(f, "{}", s), NType::Comment(ref s) => write!(f, "", s), NType::Element(ref d) => { - write!(f, "<{}", d.name); - write!(f, "{}", d.attr); - write!(f, ">"); + write!(f, "<{}{}>", d.name, d.attr); for child in self.children.iter() { write!(f, "{}", child); } write!(f, "", d.name) }, + NType::Stylesheet(ref s) => { + write!(f, "", s.attr, s.content) + } } } } @@ -66,8 +88,3 @@ impl std::fmt::Display for Attr { return Result::Ok(()) } } - -/*fn main() { - let e = elem("html".to_string(), Attr{attrs:"ab".chars().map(|c| (c.to_string(), c.to_string())).collect::>()}, vec![elem("body".to_string(), Attr{attrs:HashMap::new()}, vec![text("hi".to_string())])]); - println!("{}", e) -}*/ diff --git a/src/html.rs b/src/html.rs index a393b10..2bf9b27 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,3 +1,7 @@ +use std::collections::HashMap; + +use dom; + struct Parser { pos: usize, input: String, @@ -29,20 +33,20 @@ impl Parser { } fn consume_while(&mut self, test: F) -> String - where F: Fn(char) -> bool { + where F: Fn(char, &mut Parser) -> bool { let mut result = String::new(); - while !self.eof() && test(self.next()) { + while !self.eof() && test(self.next(), self) { result.push(self.consume()); } return result; } fn consume_ws(&mut self) { - self.consume_while(CharExt::is_whitespace); + self.consume_while(|x, _| char::is_whitespace(x)); } fn parse_tag_name(&mut self) -> String { - self.consume_while(|c| match c { + self.consume_while(|c, _| match c { 'a'...'z' | 'A'...'Z' | '0'...'9' => true, _ => false, }) @@ -60,63 +64,73 @@ impl Parser { } fn parse_text(&mut self) -> dom::Node { - dom::text(self.consume_while(|c| c != '<')) + dom::text(self.consume_while(|c, _| c != '<')) } fn parse_element(&mut self) -> dom::Node { - // Opening tag. - assert!(self.consume_char() == '<'); + assert!(self.consume() == '<'); let tag_name = self.parse_tag_name(); let attrs = self.parse_attributes(); - assert!(self.consume_char() == '>'); + assert!(self.consume() == '>'); + + if tag_name == "style" { + // TODO: make safe + let contents = self.consume_while(|c, s| c != '<' && + s.next() != '/'); + + assert!(self.consume() == '<'); + assert!(self.consume() == '/'); + assert!(self.parse_tag_name() == tag_name); + assert!(self.consume() == '>'); + + return dom::style(contents, attrs); + } - // Contents. let children = self.parse_nodes(); - // Closing tag. - assert!(self.consume_char() == '<'); - assert!(self.consume_char() == '/'); + assert!(self.consume() == '<'); + assert!(self.consume() == '/'); assert!(self.parse_tag_name() == tag_name); - assert!(self.consume_char() == '>'); + assert!(self.consume() == '>'); return dom::elem(tag_name, attrs, children); } fn parse_comment(&mut self) -> dom::Node { - assert!(self.consume_char() == '<'); - assert!(self.consume_char() == '!'); - assert!(self.consume_char() == '-'); - assert!(self.consume_char() == '-'); + assert!(self.consume() == '<'); + assert!(self.consume() == '!'); + assert!(self.consume() == '-'); + assert!(self.consume() == '-'); - let contents = consume_while(|c| c != '-' && - self.next() != '-' && - self.nth(1) != '>') + let contents = self.consume_while(|c, s| c != '-' && + s.next() != '-' && + s.nth(1) != '>'); - assert!(self.consume_char() == '-'); - assert!(self.consume_char() == '-'); - assert!(self.consume_char() == '>'); + assert!(self.consume() == '-'); + assert!(self.consume() == '-'); + assert!(self.consume() == '>'); return dom::comment(contents); } fn parse_attr(&mut self) -> (String, String) { let name = self.parse_tag_name(); - assert!(self.consume_char() == '='); + assert!(self.consume() == '='); let value = self.parse_attr_value(); return (name, value); } // Parse a quoted value. fn parse_attr_value(&mut self) -> String { - let open_quote = self.consume_char(); + let open_quote = self.consume(); assert!(open_quote == '"' || open_quote == '\''); - let value = self.consume_while(|c| c != open_quote); - assert!(self.consume_char() == open_quote); + let value = self.consume_while(|c, _| c != open_quote); + assert!(self.consume() == open_quote); return value; } // Parse a list of name="value" pairs, separated by whitespace. - fn parse_attributes(&mut self) -> dom::AttrMap { + fn parse_attributes(&mut self) -> dom::Attr { let mut attributes = HashMap::new(); loop { self.consume_ws(); @@ -126,7 +140,7 @@ impl Parser { let (name, value) = self.parse_attr(); attributes.insert(name, value); } - return attributes; + return dom::attr(attributes); } fn parse_nodes(&mut self) -> Vec { @@ -140,14 +154,14 @@ impl Parser { } return nodes; } +} - pub fn parse(source: String) -> dom::Node { - let mut nodes = Parser { pos: 0, input: source }.parse_nodes(); +pub fn parse(source: String) -> dom::Node { + let mut nodes = Parser { pos: 0, input: source }.parse_nodes(); - if nodes.len() == 1 { - nodes.swap_remove(0) - } else { - dom::elem("html".to_string(), HashMap::new(), nodes) - } + if nodes.len() == 1 { + nodes.swap_remove(0) + } else { + dom::elem("html".to_string(), dom::attr(HashMap::new()), nodes) } }