diff --git a/src/kicad_sexpr.gleam b/src/kicad_sexpr.gleam index 98a1120..be94e27 100644 --- a/src/kicad_sexpr.gleam +++ b/src/kicad_sexpr.gleam @@ -1,13 +1,11 @@ import gleam/float import gleam/int import gleam/io - import gleam/list import gleam/result import gleam/string import kicad_sexpr/decode import kicad_sexpr/parse - import kicad_sexpr/token import simplifile @@ -41,7 +39,7 @@ pub fn main() -> Nil { let #(successfully_read, _failed_to_read) = file_names |> list.map(fn(file_name) { - simplifile.read(file_name) + simplifile.read_bits(file_name) |> result.map(fn(res) { #(file_name, res) }) |> result.map_error(fn(res) { #(file_name, res) }) }) diff --git a/src/kicad_sexpr/parse.gleam b/src/kicad_sexpr/parse.gleam index 3f475bf..18c598a 100644 --- a/src/kicad_sexpr/parse.gleam +++ b/src/kicad_sexpr/parse.gleam @@ -1,8 +1,6 @@ -import gleam/bool import gleam/float import gleam/int import gleam/list -import gleam/option.{type Option, None, Some} import gleam/result import gleam/string @@ -11,9 +9,10 @@ pub type ParseError { UnexpectedTokenCharacter(got: String, expected: String) UnexpectedNameCharacter(got: String) UnexpectedNumberCharacter(got: String) - UnexpectedTrailingString(got: String) UnterminatedString(got: String) InvalidNumber(got: String) + UnexpectedTrailingData(got: BitArray) + InvalidUtf8Character(got: BitArray) } pub type SExpr { @@ -25,7 +24,7 @@ pub type SExpr { } pub type Parsed(a) = - Result(#(a, String), ParseError) + Result(#(a, BitArray), ParseError) pub fn sexpr_to_pretty_string(sexpr: SExpr) -> String { do_sexpr_to_pretty_string(sexpr, "") @@ -47,170 +46,218 @@ fn do_sexpr_to_pretty_string(sexpr: SExpr, pad: String) -> String { } } -pub fn run(source: String) -> Result(SExpr, ParseError) { - let source = string.trim(source) +pub fn run(source: BitArray) -> Result(SExpr, ParseError) { + let source = trim_start(source) use #(token, rest) <- result.try(attribute(source)) - case string.trim(rest) { - "" -> Ok(token) - rest -> Error(UnexpectedTrailingString(rest)) + case trim_start(rest) { + <<>> -> Ok(token) + rest -> Error(UnexpectedTrailingData(rest)) } } -pub fn token(source: String) -> Parsed(SExpr) { - use #(name, rest) <- result.try(name(source)) - use #(attributes, rest) <- result.try(attributes(rest)) - Ok(#(Token(name:, attributes:), rest)) -} - -fn name_char(source: String) -> Parsed(String) { +fn trim_start(source: BitArray) -> BitArray { case source { - "" -> Error(UnexpectedEndOfFile) - "a" <> rest -> Ok(#("a", rest)) - "b" <> rest -> Ok(#("b", rest)) - "c" <> rest -> Ok(#("c", rest)) - "d" <> rest -> Ok(#("d", rest)) - "e" <> rest -> Ok(#("e", rest)) - "f" <> rest -> Ok(#("f", rest)) - "g" <> rest -> Ok(#("g", rest)) - "h" <> rest -> Ok(#("h", rest)) - "i" <> rest -> Ok(#("i", rest)) - "j" <> rest -> Ok(#("j", rest)) - "k" <> rest -> Ok(#("k", rest)) - "l" <> rest -> Ok(#("l", rest)) - "m" <> rest -> Ok(#("m", rest)) - "n" <> rest -> Ok(#("n", rest)) - "o" <> rest -> Ok(#("o", rest)) - "p" <> rest -> Ok(#("p", rest)) - "q" <> rest -> Ok(#("q", rest)) - "r" <> rest -> Ok(#("r", rest)) - "s" <> rest -> Ok(#("s", rest)) - "t" <> rest -> Ok(#("t", rest)) - "u" <> rest -> Ok(#("u", rest)) - "v" <> rest -> Ok(#("v", rest)) - "w" <> rest -> Ok(#("w", rest)) - "x" <> rest -> Ok(#("x", rest)) - "y" <> rest -> Ok(#("y", rest)) - "z" <> rest -> Ok(#("z", rest)) - "_" <> rest -> Ok(#("_", rest)) - _ -> - Error(UnexpectedNameCharacter(string.first(source) |> result.unwrap(""))) + <<32, rest:bits>> + | <<9, rest:bits>> + | <<10, rest:bits>> + | <<11, rest:bits>> + | <<12, rest:bits>> + | <<13, rest:bits>> -> trim_start(rest) + _ -> source } } -pub fn name(source: String) -> Parsed(String) { - use #(char, rest) <- result.try(name_char(source)) - do_name(rest, char) +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../gleam_stdlib.mjs", "codepoint") +fn utf_codepoint_unsafe(a: Int) -> UtfCodepoint + +fn name_char(source: BitArray) -> Parsed(UtfCodepoint) { + case source { + <<>> -> Error(UnexpectedEndOfFile) + <<65 as i, rest:bits>> + | <<66 as i, rest:bits>> + | <<67 as i, rest:bits>> + | <<68 as i, rest:bits>> + | <<69 as i, rest:bits>> + | <<70 as i, rest:bits>> + | <<71 as i, rest:bits>> + | <<72 as i, rest:bits>> + | <<73 as i, rest:bits>> + | <<74 as i, rest:bits>> + | <<75 as i, rest:bits>> + | <<76 as i, rest:bits>> + | <<77 as i, rest:bits>> + | <<78 as i, rest:bits>> + | <<79 as i, rest:bits>> + | <<80 as i, rest:bits>> + | <<81 as i, rest:bits>> + | <<82 as i, rest:bits>> + | <<83 as i, rest:bits>> + | <<84 as i, rest:bits>> + | <<85 as i, rest:bits>> + | <<86 as i, rest:bits>> + | <<87 as i, rest:bits>> + | <<88 as i, rest:bits>> + | <<89 as i, rest:bits>> + | <<90 as i, rest:bits>> + | <<97 as i, rest:bits>> + | <<98 as i, rest:bits>> + | <<99 as i, rest:bits>> + | <<100 as i, rest:bits>> + | <<101 as i, rest:bits>> + | <<102 as i, rest:bits>> + | <<103 as i, rest:bits>> + | <<104 as i, rest:bits>> + | <<105 as i, rest:bits>> + | <<106 as i, rest:bits>> + | <<107 as i, rest:bits>> + | <<108 as i, rest:bits>> + | <<109 as i, rest:bits>> + | <<110 as i, rest:bits>> + | <<111 as i, rest:bits>> + | <<112 as i, rest:bits>> + | <<113 as i, rest:bits>> + | <<114 as i, rest:bits>> + | <<115 as i, rest:bits>> + | <<116 as i, rest:bits>> + | <<117 as i, rest:bits>> + | <<118 as i, rest:bits>> + | <<119 as i, rest:bits>> + | <<120 as i, rest:bits>> + | <<121 as i, rest:bits>> + | <<122 as i, rest:bits>> + | <<48 as i, rest:bits>> + | <<49 as i, rest:bits>> + | <<50 as i, rest:bits>> + | <<51 as i, rest:bits>> + | <<52 as i, rest:bits>> + | <<53 as i, rest:bits>> + | <<54 as i, rest:bits>> + | <<55 as i, rest:bits>> + | <<56 as i, rest:bits>> + | <<57 as i, rest:bits>> + | <<95 as i, rest:bits>> + | <<42 as i, rest:bits>> + | <<45 as i, rest:bits>> + | <<46 as i, rest:bits>> -> Ok(#(utf_codepoint_unsafe(i), rest)) + <> -> + Error(UnexpectedNameCharacter(string.from_utf_codepoints([cp]))) + source -> Error(InvalidUtf8Character(source)) + } } -fn do_name(source: String, result: String) -> Parsed(String) { +fn do_name(source: BitArray, result: List(UtfCodepoint)) -> Parsed(String) { case name_char(source) { - Ok(#(char, rest)) -> do_name(rest, result <> char) - Error(_) -> Ok(#(result, source)) + Ok(#(cp, rest)) -> do_name(rest, [cp, ..result]) + Error(_) -> + Ok(#(result |> list.reverse |> string.from_utf_codepoints, source)) } } -pub fn attributes(source: String) -> Parsed(List(SExpr)) { - do_attributes(source, []) -} - -fn do_attributes(source: String, attributes: List(SExpr)) -> Parsed(List(SExpr)) { - case string.trim_start(source) { - "" -> Error(UnexpectedEndOfFile) - ")" <> rest -> Ok(#(list.reverse(attributes), rest)) +fn do_attributes(source: BitArray, attrs: List(SExpr)) -> Parsed(List(SExpr)) { + case trim_start(source) { + <<>> -> Error(UnexpectedEndOfFile) + <<41, rest:bits>> -> Ok(#(attrs |> list.reverse, rest)) source -> { - use #(attribute, rest) <- result.try(attribute(source)) - do_attributes(rest, [attribute, ..attributes]) + use #(attr, rest) <- result.try(attribute(source)) + do_attributes(rest, [attr, ..attrs]) } } } -pub fn attribute(source: String) -> Parsed(SExpr) { +fn attribute(source: BitArray) -> Parsed(SExpr) { case source { - "" -> Error(UnexpectedEndOfFile) - "(" <> rest -> { - use #(token, rest) <- result.try(token(rest)) - Ok(#(token, rest)) + <<>> -> Error(UnexpectedEndOfFile) + <<40, rest:bits>> -> { + use #(cp, rest) <- result.try(name_char(rest)) + use #(name, rest) <- result.try(do_name(rest, [cp])) + use #(attributes, rest) <- result.try(do_attributes(rest, [])) + Ok(#(Token(name:, attributes:), rest)) } - "\"" <> rest -> { - use #(str, rest) <- result.try(string(rest)) + <<34, rest:bits>> -> { + use #(str, rest) <- result.try(do_string(rest, [])) Ok(#(String(str), rest)) } + <<45 as i, rest:bits>> + | <<48 as i, rest:bits>> + | <<49 as i, rest:bits>> + | <<50 as i, rest:bits>> + | <<51 as i, rest:bits>> + | <<52 as i, rest:bits>> + | <<53 as i, rest:bits>> + | <<54 as i, rest:bits>> + | <<55 as i, rest:bits>> + | <<56 as i, rest:bits>> + | <<57 as i, rest:bits>> -> + do_number(rest, #([utf_codepoint_unsafe(i)], False)) source -> { - use <- option.lazy_unwrap(try_number(source)) - use #(name, rest) <- result.try(name(source)) + use #(cp, rest) <- result.try(name_char(source)) + use #(name, rest) <- result.try(do_name(rest, [cp])) Ok(#(Name(name), rest)) } } } -pub fn string(source: String) -> Parsed(String) { - use <- bool.guard(source == "", Error(UnexpectedEndOfFile)) - do_string(source, "") -} - -fn do_string(source: String, result: String) -> Parsed(String) { - case string.split_once(source, "\"") { - Ok(#(start, rest)) -> - case string.last(start) { - Ok("\\") -> do_string(rest, result <> "\"" <> start) - _ -> - Ok(#( - result <> start, - // |> string.replace("\\n", "\n") - // |> string.replace("\\r", "\r") - // |> string.replace("\\t", "\t") - // |> string.replace("\\f", "\f") - rest, - )) - } - Error(Nil) -> Error(UnterminatedString(source)) - } -} - -fn number_char(source: String) -> Parsed(String) { +fn do_string(source: BitArray, acc: List(UtfCodepoint)) -> Parsed(String) { case source { - "" -> Error(UnexpectedEndOfFile) - "." <> rest -> Ok(#(".", rest)) - "-" <> rest -> Ok(#("-", rest)) - "0" <> rest -> Ok(#("0", rest)) - "1" <> rest -> Ok(#("1", rest)) - "2" <> rest -> Ok(#("2", rest)) - "3" <> rest -> Ok(#("3", rest)) - "4" <> rest -> Ok(#("4", rest)) - "5" <> rest -> Ok(#("5", rest)) - "6" <> rest -> Ok(#("6", rest)) - "7" <> rest -> Ok(#("7", rest)) - "8" <> rest -> Ok(#("8", rest)) - "9" <> rest -> Ok(#("9", rest)) - _ -> - Error(UnexpectedNumberCharacter(string.first(source) |> result.unwrap(""))) + <<>> -> + Error(UnterminatedString( + acc |> list.reverse |> string.from_utf_codepoints, + )) + <<34, rest:bits>> -> + Ok(#(acc |> list.reverse |> string.from_utf_codepoints, rest)) + <<92, 48, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(0), ..acc]) + <<92, 97, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(7), ..acc]) + <<92, 98, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(8), ..acc]) + <<92, 116, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(9), ..acc]) + <<92, 110, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(10), ..acc]) + <<92, 118, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(11), ..acc]) + <<92, 102, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(12), ..acc]) + <<92, 114, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(13), ..acc]) + <<92, cp:utf8_codepoint, rest:bits>> -> do_string(rest, [cp, ..acc]) + <> -> do_string(rest, [cp, ..acc]) + source -> Error(InvalidUtf8Character(source)) } } -pub fn try_number(source: String) -> Option(Parsed(SExpr)) { - case number_char(source) { - Ok(#(char, rest)) -> Some(do_number(rest, char)) - Error(_) -> None - } -} - -fn do_number(source: String, result: String) -> Parsed(SExpr) { - case number_char(source) { - Ok(#(char, rest)) -> do_number(rest, result <> char) - Error(_) -> { - case int.parse(result) { - Ok(n) -> Ok(#(Int(n), source)) - Error(Nil) -> { - let result = case result { - "-." <> rest -> "-0." <> rest - "." <> rest -> "0." <> rest - result -> result - } - case float.parse(result) { +fn do_number( + source: BitArray, + acc: #(List(UtfCodepoint), Bool), +) -> Parsed(SExpr) { + case source, acc { + <<>>, _ -> Error(UnexpectedEndOfFile) + <<46 as i, _:bits>>, #(cps, True) -> + Error(InvalidNumber( + [utf_codepoint_unsafe(i), ..cps] + |> list.reverse + |> string.from_utf_codepoints, + )) + <<46 as i, rest:bits>>, #(cps, False) -> + do_number(rest, #([utf_codepoint_unsafe(i), ..cps], True)) + <<48 as i, rest:bits>>, #(cps, has_decimal) + | <<49 as i, rest:bits>>, #(cps, has_decimal) + | <<50 as i, rest:bits>>, #(cps, has_decimal) + | <<51 as i, rest:bits>>, #(cps, has_decimal) + | <<52 as i, rest:bits>>, #(cps, has_decimal) + | <<53 as i, rest:bits>>, #(cps, has_decimal) + | <<54 as i, rest:bits>>, #(cps, has_decimal) + | <<55 as i, rest:bits>>, #(cps, has_decimal) + | <<56 as i, rest:bits>>, #(cps, has_decimal) + | <<57 as i, rest:bits>>, #(cps, has_decimal) + -> do_number(rest, #([utf_codepoint_unsafe(i), ..cps], has_decimal)) + source, #(cps, has_decimal) -> { + let str = cps |> list.reverse |> string.from_utf_codepoints + case has_decimal { + True -> + case float.parse(str) { Ok(n) -> Ok(#(Float(n), source)) - Error(Nil) -> Error(InvalidNumber(result)) + Error(Nil) -> Error(InvalidNumber(str)) + } + False -> + case int.parse(str) { + Ok(n) -> Ok(#(Int(n), source)) + Error(Nil) -> Error(InvalidNumber(str)) } - } } } }