331 lines
11 KiB
Gleam
331 lines
11 KiB
Gleam
import gleam/bit_array
|
|
import gleam/float
|
|
import gleam/int
|
|
import gleam/list
|
|
import gleam/result
|
|
import gleam/string
|
|
|
|
pub type ParseError {
|
|
UnexpectedEndOfFile
|
|
MissingTokenName
|
|
UnterminatedString(got: String)
|
|
UnexpectedTrailingData(got: BitArray)
|
|
InvalidUtf8Character(got: BitArray)
|
|
}
|
|
|
|
pub type SExpr {
|
|
Token(name: String, attributes: List(SExpr))
|
|
String(String)
|
|
Int(Int)
|
|
Float(Float)
|
|
Name(String)
|
|
}
|
|
|
|
type Parsed(a) =
|
|
Result(#(a, BitArray), ParseError)
|
|
|
|
pub fn sexpr_to_pretty_string(sexpr: SExpr) -> String {
|
|
do_sexpr_to_pretty_string(sexpr, "")
|
|
}
|
|
|
|
fn do_sexpr_to_pretty_string(sexpr: SExpr, pad: String) -> String {
|
|
pad
|
|
<> case sexpr {
|
|
Token(name:, attributes:) ->
|
|
name
|
|
<> " :: Token\n"
|
|
<> attributes
|
|
|> list.map(do_sexpr_to_pretty_string(_, pad <> " "))
|
|
|> string.join("\n")
|
|
String(value) -> "\"" <> value <> "\" :: String"
|
|
Int(value) -> int.to_string(value) <> " :: Int"
|
|
Float(value) -> float.to_string(value) <> " :: Float"
|
|
Name(value) -> value <> " :: Name"
|
|
}
|
|
}
|
|
|
|
pub fn run(source: BitArray) -> Result(SExpr, ParseError) {
|
|
let source = trim_start(source)
|
|
use #(token, rest) <- result.try(attribute(source))
|
|
case trim_start(rest) {
|
|
<<>> -> Ok(token)
|
|
rest -> Error(UnexpectedTrailingData(rest))
|
|
}
|
|
}
|
|
|
|
fn trim_start(source: BitArray) -> BitArray {
|
|
case source {
|
|
<<32, rest:bits>>
|
|
| <<9, rest:bits>>
|
|
| <<10, rest:bits>>
|
|
| <<11, rest:bits>>
|
|
| <<12, rest:bits>>
|
|
| <<13, rest:bits>> -> trim_start(rest)
|
|
_ -> source
|
|
}
|
|
}
|
|
|
|
@external(erlang, "gleam_stdlib", "identity")
|
|
@external(javascript, "../gleam_stdlib.mjs", "codepoint")
|
|
fn utf_codepoint_unsafe(a: Int) -> UtfCodepoint
|
|
|
|
fn do_token_name(source: BitArray, cps: List(UtfCodepoint)) -> Parsed(String) {
|
|
case source {
|
|
<<65 as i, rest:bits>>
|
|
| <<66 as i, rest:bits>>
|
|
| <<67 as i, rest:bits>>
|
|
| <<68 as i, rest:bits>>
|
|
| <<69 as i, rest:bits>>
|
|
| <<70 as i, rest:bits>>
|
|
| <<71 as i, rest:bits>>
|
|
| <<72 as i, rest:bits>>
|
|
| <<73 as i, rest:bits>>
|
|
| <<74 as i, rest:bits>>
|
|
| <<75 as i, rest:bits>>
|
|
| <<76 as i, rest:bits>>
|
|
| <<77 as i, rest:bits>>
|
|
| <<78 as i, rest:bits>>
|
|
| <<79 as i, rest:bits>>
|
|
| <<80 as i, rest:bits>>
|
|
| <<81 as i, rest:bits>>
|
|
| <<82 as i, rest:bits>>
|
|
| <<83 as i, rest:bits>>
|
|
| <<84 as i, rest:bits>>
|
|
| <<85 as i, rest:bits>>
|
|
| <<86 as i, rest:bits>>
|
|
| <<87 as i, rest:bits>>
|
|
| <<88 as i, rest:bits>>
|
|
| <<89 as i, rest:bits>>
|
|
| <<90 as i, rest:bits>>
|
|
| <<97 as i, rest:bits>>
|
|
| <<98 as i, rest:bits>>
|
|
| <<99 as i, rest:bits>>
|
|
| <<100 as i, rest:bits>>
|
|
| <<101 as i, rest:bits>>
|
|
| <<102 as i, rest:bits>>
|
|
| <<103 as i, rest:bits>>
|
|
| <<104 as i, rest:bits>>
|
|
| <<105 as i, rest:bits>>
|
|
| <<106 as i, rest:bits>>
|
|
| <<107 as i, rest:bits>>
|
|
| <<108 as i, rest:bits>>
|
|
| <<109 as i, rest:bits>>
|
|
| <<110 as i, rest:bits>>
|
|
| <<111 as i, rest:bits>>
|
|
| <<112 as i, rest:bits>>
|
|
| <<113 as i, rest:bits>>
|
|
| <<114 as i, rest:bits>>
|
|
| <<115 as i, rest:bits>>
|
|
| <<116 as i, rest:bits>>
|
|
| <<117 as i, rest:bits>>
|
|
| <<118 as i, rest:bits>>
|
|
| <<119 as i, rest:bits>>
|
|
| <<120 as i, rest:bits>>
|
|
| <<121 as i, rest:bits>>
|
|
| <<122 as i, rest:bits>>
|
|
| <<48 as i, rest:bits>>
|
|
| <<49 as i, rest:bits>>
|
|
| <<50 as i, rest:bits>>
|
|
| <<51 as i, rest:bits>>
|
|
| <<52 as i, rest:bits>>
|
|
| <<53 as i, rest:bits>>
|
|
| <<54 as i, rest:bits>>
|
|
| <<55 as i, rest:bits>>
|
|
| <<56 as i, rest:bits>>
|
|
| <<57 as i, rest:bits>>
|
|
| <<95 as i, rest:bits>> ->
|
|
do_token_name(rest, [utf_codepoint_unsafe(i), ..cps])
|
|
source ->
|
|
case cps {
|
|
[] -> Error(MissingTokenName)
|
|
cps -> Ok(#(cps |> list.reverse |> string.from_utf_codepoints, source))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn do_attributes(source: BitArray, attrs: List(SExpr)) -> Parsed(List(SExpr)) {
|
|
case trim_start(source) {
|
|
<<>> -> Error(UnexpectedEndOfFile)
|
|
<<41, rest:bits>> -> Ok(#(attrs |> list.reverse, rest))
|
|
source -> {
|
|
use #(attr, rest) <- result.try(attribute(source))
|
|
do_attributes(rest, [attr, ..attrs])
|
|
}
|
|
}
|
|
}
|
|
|
|
fn attribute(source: BitArray) -> Parsed(SExpr) {
|
|
case source {
|
|
<<>> -> Error(UnexpectedEndOfFile)
|
|
<<40, rest:bits>> -> {
|
|
use #(name, rest) <- result.try(do_token_name(rest, []))
|
|
use #(attributes, rest) <- result.try(do_attributes(rest, []))
|
|
Ok(#(Token(name:, attributes:), rest))
|
|
}
|
|
<<34, rest:bits>> -> {
|
|
use #(str, rest) <- result.try(do_string(rest, []))
|
|
Ok(#(String(str), rest))
|
|
}
|
|
source -> do_name_number(source, #([], ParsedInt))
|
|
}
|
|
}
|
|
|
|
fn do_string(source: BitArray, acc: List(UtfCodepoint)) -> Parsed(String) {
|
|
case source {
|
|
<<>> ->
|
|
Error(UnterminatedString(
|
|
acc |> list.reverse |> string.from_utf_codepoints,
|
|
))
|
|
<<34, rest:bits>> ->
|
|
Ok(#(acc |> list.reverse |> string.from_utf_codepoints, rest))
|
|
<<92, 48, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(0), ..acc])
|
|
<<92, 97, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(7), ..acc])
|
|
<<92, 98, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(8), ..acc])
|
|
<<92, 116, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(9), ..acc])
|
|
<<92, 110, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(10), ..acc])
|
|
<<92, 118, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(11), ..acc])
|
|
<<92, 102, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(12), ..acc])
|
|
<<92, 114, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(13), ..acc])
|
|
<<92, cp:utf8_codepoint, rest:bits>> -> do_string(rest, [cp, ..acc])
|
|
<<cp:utf8_codepoint, rest:bits>> -> do_string(rest, [cp, ..acc])
|
|
source ->
|
|
Error(InvalidUtf8Character(
|
|
source |> bit_array.slice(0, 4) |> result.unwrap(source),
|
|
))
|
|
}
|
|
}
|
|
|
|
type ParsedType {
|
|
ParsedInt
|
|
ParsedFloat
|
|
ParsedName
|
|
ParsedString
|
|
}
|
|
|
|
fn do_name_number(
|
|
source: BitArray,
|
|
acc: #(List(UtfCodepoint), ParsedType),
|
|
) -> Parsed(SExpr) {
|
|
case source, acc {
|
|
<<45 as i, rest:bits>>, #(cps, parsed_type) ->
|
|
do_name_number(
|
|
rest,
|
|
#([utf_codepoint_unsafe(i), ..cps], case cps {
|
|
[] -> parsed_type
|
|
_ -> ParsedString
|
|
}),
|
|
)
|
|
<<46 as i, rest:bits>>, #(cps, parsed_type) ->
|
|
do_name_number(
|
|
rest,
|
|
#([utf_codepoint_unsafe(i), ..cps], case parsed_type {
|
|
ParsedInt -> ParsedFloat
|
|
_ -> ParsedString
|
|
}),
|
|
)
|
|
<<48 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<49 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<50 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<51 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<52 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<53 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<54 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<55 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<56 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<57 as i, rest:bits>>, #(cps, parsed_type)
|
|
-> do_name_number(rest, #([utf_codepoint_unsafe(i), ..cps], parsed_type))
|
|
<<65 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<66 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<67 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<68 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<69 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<70 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<71 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<72 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<73 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<74 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<75 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<76 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<77 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<78 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<79 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<80 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<81 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<82 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<83 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<84 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<85 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<86 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<87 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<88 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<89 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<90 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<97 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<98 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<99 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<100 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<101 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<102 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<103 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<104 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<105 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<106 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<107 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<108 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<109 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<110 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<111 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<112 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<113 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<114 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<115 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<116 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<117 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<118 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<119 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<120 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<121 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<122 as i, rest:bits>>, #(cps, parsed_type)
|
|
| <<95 as i, rest:bits>>, #(cps, parsed_type)
|
|
->
|
|
do_name_number(
|
|
rest,
|
|
#([utf_codepoint_unsafe(i), ..cps], case parsed_type {
|
|
ParsedInt | ParsedFloat -> ParsedName
|
|
x -> x
|
|
}),
|
|
)
|
|
<<32, source:bits>>, #(cps, parsed_type)
|
|
| <<9, source:bits>>, #(cps, parsed_type)
|
|
| <<10, source:bits>>, #(cps, parsed_type)
|
|
| <<11, source:bits>>, #(cps, parsed_type)
|
|
| <<12, source:bits>>, #(cps, parsed_type)
|
|
| <<13, source:bits>>, #(cps, parsed_type)
|
|
| <<34, _:bits>> as source, #(cps, parsed_type)
|
|
| <<40, _:bits>> as source, #(cps, parsed_type)
|
|
| <<41, _:bits>> as source, #(cps, parsed_type)
|
|
-> {
|
|
let str = cps |> list.reverse |> string.from_utf_codepoints
|
|
case parsed_type {
|
|
ParsedInt ->
|
|
case int.parse(str) {
|
|
Ok(n) -> Ok(#(Int(n), source))
|
|
Error(Nil) -> Ok(#(Name(str), source))
|
|
}
|
|
ParsedFloat ->
|
|
case float.parse(str) {
|
|
Ok(n) -> Ok(#(Float(n), source))
|
|
Error(Nil) -> Ok(#(Name(str), source))
|
|
}
|
|
ParsedName -> Ok(#(Name(str), source))
|
|
ParsedString -> Ok(#(String(str), source))
|
|
}
|
|
}
|
|
<<cp:utf8_codepoint, rest:bits>>, #(cps, _) ->
|
|
do_name_number(rest, #([cp, ..cps], ParsedString))
|
|
source, #(_, _) ->
|
|
Error(InvalidUtf8Character(
|
|
source |> bit_array.slice(0, 4) |> result.unwrap(source),
|
|
))
|
|
}
|
|
}
|