import gleam/bit_array import gleam/float import gleam/int import gleam/list import gleam/result import gleam/string pub type ParseError { UnexpectedEndOfFile MissingTokenName UnterminatedString(got: String) UnexpectedTrailingData(got: BitArray) InvalidUtf8Character(got: BitArray) } pub type SExpr { Token(name: String, attributes: List(SExpr)) String(String) Int(Int) Float(Float) Name(String) } type Parsed(a) = Result(#(a, BitArray), ParseError) pub fn sexpr_to_pretty_string(sexpr: SExpr) -> String { do_sexpr_to_pretty_string(sexpr, "") } fn do_sexpr_to_pretty_string(sexpr: SExpr, pad: String) -> String { pad <> case sexpr { Token(name:, attributes:) -> name <> " :: Token\n" <> attributes |> list.map(do_sexpr_to_pretty_string(_, pad <> " ")) |> string.join("\n") String(value) -> "\"" <> value <> "\" :: String" Int(value) -> int.to_string(value) <> " :: Int" Float(value) -> float.to_string(value) <> " :: Float" Name(value) -> value <> " :: Name" } } pub fn run(source: BitArray) -> Result(SExpr, ParseError) { let source = trim_start(source) use #(token, rest) <- result.try(attribute(source)) case trim_start(rest) { <<>> -> Ok(token) rest -> Error(UnexpectedTrailingData(rest)) } } fn trim_start(source: BitArray) -> BitArray { case source { <<32, rest:bits>> | <<9, rest:bits>> | <<10, rest:bits>> | <<11, rest:bits>> | <<12, rest:bits>> | <<13, rest:bits>> -> trim_start(rest) _ -> source } } @external(erlang, "gleam_stdlib", "identity") @external(javascript, "../gleam_stdlib.mjs", "codepoint") fn utf_codepoint_unsafe(a: Int) -> UtfCodepoint fn do_token_name(source: BitArray, cps: List(UtfCodepoint)) -> Parsed(String) { case source { <<65 as i, rest:bits>> | <<66 as i, rest:bits>> | <<67 as i, rest:bits>> | <<68 as i, rest:bits>> | <<69 as i, rest:bits>> | <<70 as i, rest:bits>> | <<71 as i, rest:bits>> | <<72 as i, rest:bits>> | <<73 as i, rest:bits>> | <<74 as i, rest:bits>> | <<75 as i, rest:bits>> | <<76 as i, rest:bits>> | <<77 as i, rest:bits>> | <<78 as i, rest:bits>> | <<79 as i, rest:bits>> | <<80 as i, rest:bits>> | <<81 as i, rest:bits>> | <<82 as i, rest:bits>> | <<83 as i, rest:bits>> | <<84 as i, rest:bits>> | <<85 as i, rest:bits>> | <<86 as i, rest:bits>> | <<87 as i, rest:bits>> | <<88 as i, rest:bits>> | <<89 as i, rest:bits>> | <<90 as i, rest:bits>> | <<97 as i, rest:bits>> | <<98 as i, rest:bits>> | <<99 as i, rest:bits>> | <<100 as i, rest:bits>> | <<101 as i, rest:bits>> | <<102 as i, rest:bits>> | <<103 as i, rest:bits>> | <<104 as i, rest:bits>> | <<105 as i, rest:bits>> | <<106 as i, rest:bits>> | <<107 as i, rest:bits>> | <<108 as i, rest:bits>> | <<109 as i, rest:bits>> | <<110 as i, rest:bits>> | <<111 as i, rest:bits>> | <<112 as i, rest:bits>> | <<113 as i, rest:bits>> | <<114 as i, rest:bits>> | <<115 as i, rest:bits>> | <<116 as i, rest:bits>> | <<117 as i, rest:bits>> | <<118 as i, rest:bits>> | <<119 as i, rest:bits>> | <<120 as i, rest:bits>> | <<121 as i, rest:bits>> | <<122 as i, rest:bits>> | <<48 as i, rest:bits>> | <<49 as i, rest:bits>> | <<50 as i, rest:bits>> | <<51 as i, rest:bits>> | <<52 as i, rest:bits>> | <<53 as i, rest:bits>> | <<54 as i, rest:bits>> | <<55 as i, rest:bits>> | <<56 as i, rest:bits>> | <<57 as i, rest:bits>> | <<95 as i, rest:bits>> -> do_token_name(rest, [utf_codepoint_unsafe(i), ..cps]) source -> case cps { [] -> Error(MissingTokenName) cps -> Ok(#(cps |> list.reverse |> string.from_utf_codepoints, source)) } } } fn do_attributes(source: BitArray, attrs: List(SExpr)) -> Parsed(List(SExpr)) { case trim_start(source) { <<>> -> Error(UnexpectedEndOfFile) <<41, rest:bits>> -> Ok(#(attrs |> list.reverse, rest)) source -> { use #(attr, rest) <- result.try(attribute(source)) do_attributes(rest, [attr, ..attrs]) } } } fn attribute(source: BitArray) -> Parsed(SExpr) { case source { <<>> -> Error(UnexpectedEndOfFile) <<40, rest:bits>> -> { use #(name, rest) <- result.try(do_token_name(rest, [])) use #(attributes, rest) <- result.try(do_attributes(rest, [])) Ok(#(Token(name:, attributes:), rest)) } <<34, rest:bits>> -> { use #(str, rest) <- result.try(do_string(rest, [])) Ok(#(String(str), rest)) } source -> do_name_number(source, #([], ParsedInt)) } } fn do_string(source: BitArray, acc: List(UtfCodepoint)) -> Parsed(String) { case source { <<>> -> Error(UnterminatedString( acc |> list.reverse |> string.from_utf_codepoints, )) <<34, rest:bits>> -> Ok(#(acc |> list.reverse |> string.from_utf_codepoints, rest)) <<92, 48, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(0), ..acc]) <<92, 97, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(7), ..acc]) <<92, 98, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(8), ..acc]) <<92, 116, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(9), ..acc]) <<92, 110, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(10), ..acc]) <<92, 118, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(11), ..acc]) <<92, 102, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(12), ..acc]) <<92, 114, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(13), ..acc]) <<92, cp:utf8_codepoint, rest:bits>> -> do_string(rest, [cp, ..acc]) <> -> do_string(rest, [cp, ..acc]) source -> Error(InvalidUtf8Character( source |> bit_array.slice(0, 4) |> result.unwrap(source), )) } } type ParsedType { ParsedInt ParsedFloat ParsedName ParsedString } fn do_name_number( source: BitArray, acc: #(List(UtfCodepoint), ParsedType), ) -> Parsed(SExpr) { case source, acc { <<45 as i, rest:bits>>, #(cps, parsed_type) -> do_name_number( rest, #([utf_codepoint_unsafe(i), ..cps], case cps { [] -> parsed_type _ -> ParsedString }), ) <<46 as i, rest:bits>>, #(cps, parsed_type) -> do_name_number( rest, #([utf_codepoint_unsafe(i), ..cps], case parsed_type { ParsedInt -> ParsedFloat _ -> ParsedString }), ) <<48 as i, rest:bits>>, #(cps, parsed_type) | <<49 as i, rest:bits>>, #(cps, parsed_type) | <<50 as i, rest:bits>>, #(cps, parsed_type) | <<51 as i, rest:bits>>, #(cps, parsed_type) | <<52 as i, rest:bits>>, #(cps, parsed_type) | <<53 as i, rest:bits>>, #(cps, parsed_type) | <<54 as i, rest:bits>>, #(cps, parsed_type) | <<55 as i, rest:bits>>, #(cps, parsed_type) | <<56 as i, rest:bits>>, #(cps, parsed_type) | <<57 as i, rest:bits>>, #(cps, parsed_type) -> do_name_number(rest, #([utf_codepoint_unsafe(i), ..cps], parsed_type)) <<65 as i, rest:bits>>, #(cps, parsed_type) | <<66 as i, rest:bits>>, #(cps, parsed_type) | <<67 as i, rest:bits>>, #(cps, parsed_type) | <<68 as i, rest:bits>>, #(cps, parsed_type) | <<69 as i, rest:bits>>, #(cps, parsed_type) | <<70 as i, rest:bits>>, #(cps, parsed_type) | <<71 as i, rest:bits>>, #(cps, parsed_type) | <<72 as i, rest:bits>>, #(cps, parsed_type) | <<73 as i, rest:bits>>, #(cps, parsed_type) | <<74 as i, rest:bits>>, #(cps, parsed_type) | <<75 as i, rest:bits>>, #(cps, parsed_type) | <<76 as i, rest:bits>>, #(cps, parsed_type) | <<77 as i, rest:bits>>, #(cps, parsed_type) | <<78 as i, rest:bits>>, #(cps, parsed_type) | <<79 as i, rest:bits>>, #(cps, parsed_type) | <<80 as i, rest:bits>>, #(cps, parsed_type) | <<81 as i, rest:bits>>, #(cps, parsed_type) | <<82 as i, rest:bits>>, #(cps, parsed_type) | <<83 as i, rest:bits>>, #(cps, parsed_type) | <<84 as i, rest:bits>>, #(cps, parsed_type) | <<85 as i, rest:bits>>, #(cps, parsed_type) | <<86 as i, rest:bits>>, #(cps, parsed_type) | <<87 as i, rest:bits>>, #(cps, parsed_type) | <<88 as i, rest:bits>>, #(cps, parsed_type) | <<89 as i, rest:bits>>, #(cps, parsed_type) | <<90 as i, rest:bits>>, #(cps, parsed_type) | <<97 as i, rest:bits>>, #(cps, parsed_type) | <<98 as i, rest:bits>>, #(cps, parsed_type) | <<99 as i, rest:bits>>, #(cps, parsed_type) | <<100 as i, rest:bits>>, #(cps, parsed_type) | <<101 as i, rest:bits>>, #(cps, parsed_type) | <<102 as i, rest:bits>>, #(cps, parsed_type) | <<103 as i, rest:bits>>, #(cps, parsed_type) | <<104 as i, rest:bits>>, #(cps, parsed_type) | <<105 as i, rest:bits>>, #(cps, parsed_type) | <<106 as i, rest:bits>>, #(cps, parsed_type) | <<107 as i, rest:bits>>, #(cps, parsed_type) | <<108 as i, rest:bits>>, #(cps, parsed_type) | <<109 as i, rest:bits>>, #(cps, parsed_type) | <<110 as i, rest:bits>>, #(cps, parsed_type) | <<111 as i, rest:bits>>, #(cps, parsed_type) | <<112 as i, rest:bits>>, #(cps, parsed_type) | <<113 as i, rest:bits>>, #(cps, parsed_type) | <<114 as i, rest:bits>>, #(cps, parsed_type) | <<115 as i, rest:bits>>, #(cps, parsed_type) | <<116 as i, rest:bits>>, #(cps, parsed_type) | <<117 as i, rest:bits>>, #(cps, parsed_type) | <<118 as i, rest:bits>>, #(cps, parsed_type) | <<119 as i, rest:bits>>, #(cps, parsed_type) | <<120 as i, rest:bits>>, #(cps, parsed_type) | <<121 as i, rest:bits>>, #(cps, parsed_type) | <<122 as i, rest:bits>>, #(cps, parsed_type) | <<95 as i, rest:bits>>, #(cps, parsed_type) -> do_name_number( rest, #([utf_codepoint_unsafe(i), ..cps], case parsed_type { ParsedInt | ParsedFloat -> ParsedName x -> x }), ) <<32, source:bits>>, #(cps, parsed_type) | <<9, source:bits>>, #(cps, parsed_type) | <<10, source:bits>>, #(cps, parsed_type) | <<11, source:bits>>, #(cps, parsed_type) | <<12, source:bits>>, #(cps, parsed_type) | <<13, source:bits>>, #(cps, parsed_type) | <<34, _:bits>> as source, #(cps, parsed_type) | <<40, _:bits>> as source, #(cps, parsed_type) | <<41, _:bits>> as source, #(cps, parsed_type) -> { let str = cps |> list.reverse |> string.from_utf_codepoints case parsed_type { ParsedInt -> case int.parse(str) { Ok(n) -> Ok(#(Int(n), source)) Error(Nil) -> Ok(#(Name(str), source)) } ParsedFloat -> case float.parse(str) { Ok(n) -> Ok(#(Float(n), source)) Error(Nil) -> Ok(#(Name(str), source)) } ParsedName -> Ok(#(Name(str), source)) ParsedString -> Ok(#(String(str), source)) } } <>, #(cps, _) -> do_name_number(rest, #([cp, ..cps], ParsedString)) source, #(_, _) -> Error(InvalidUtf8Character( source |> bit_array.slice(0, 4) |> result.unwrap(source), )) } }