Improve parsing speed by switching to BitArray
Some checks are pending
test / test (push) Waiting to run
Some checks are pending
test / test (push) Waiting to run
This commit is contained in:
parent
e5d0963c5f
commit
fc267c55e9
2 changed files with 181 additions and 136 deletions
|
@ -1,13 +1,11 @@
|
|||
import gleam/float
|
||||
import gleam/int
|
||||
import gleam/io
|
||||
|
||||
import gleam/list
|
||||
import gleam/result
|
||||
import gleam/string
|
||||
import kicad_sexpr/decode
|
||||
import kicad_sexpr/parse
|
||||
|
||||
import kicad_sexpr/token
|
||||
import simplifile
|
||||
|
||||
|
@ -41,7 +39,7 @@ pub fn main() -> Nil {
|
|||
let #(successfully_read, _failed_to_read) =
|
||||
file_names
|
||||
|> list.map(fn(file_name) {
|
||||
simplifile.read(file_name)
|
||||
simplifile.read_bits(file_name)
|
||||
|> result.map(fn(res) { #(file_name, res) })
|
||||
|> result.map_error(fn(res) { #(file_name, res) })
|
||||
})
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
import gleam/bool
|
||||
import gleam/float
|
||||
import gleam/int
|
||||
import gleam/list
|
||||
import gleam/option.{type Option, None, Some}
|
||||
import gleam/result
|
||||
import gleam/string
|
||||
|
||||
|
@ -11,9 +9,10 @@ pub type ParseError {
|
|||
UnexpectedTokenCharacter(got: String, expected: String)
|
||||
UnexpectedNameCharacter(got: String)
|
||||
UnexpectedNumberCharacter(got: String)
|
||||
UnexpectedTrailingString(got: String)
|
||||
UnterminatedString(got: String)
|
||||
InvalidNumber(got: String)
|
||||
UnexpectedTrailingData(got: BitArray)
|
||||
InvalidUtf8Character(got: BitArray)
|
||||
}
|
||||
|
||||
pub type SExpr {
|
||||
|
@ -25,7 +24,7 @@ pub type SExpr {
|
|||
}
|
||||
|
||||
pub type Parsed(a) =
|
||||
Result(#(a, String), ParseError)
|
||||
Result(#(a, BitArray), ParseError)
|
||||
|
||||
pub fn sexpr_to_pretty_string(sexpr: SExpr) -> String {
|
||||
do_sexpr_to_pretty_string(sexpr, "")
|
||||
|
@ -47,170 +46,218 @@ fn do_sexpr_to_pretty_string(sexpr: SExpr, pad: String) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn run(source: String) -> Result(SExpr, ParseError) {
|
||||
let source = string.trim(source)
|
||||
pub fn run(source: BitArray) -> Result(SExpr, ParseError) {
|
||||
let source = trim_start(source)
|
||||
use #(token, rest) <- result.try(attribute(source))
|
||||
case string.trim(rest) {
|
||||
"" -> Ok(token)
|
||||
rest -> Error(UnexpectedTrailingString(rest))
|
||||
case trim_start(rest) {
|
||||
<<>> -> Ok(token)
|
||||
rest -> Error(UnexpectedTrailingData(rest))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn token(source: String) -> Parsed(SExpr) {
|
||||
use #(name, rest) <- result.try(name(source))
|
||||
use #(attributes, rest) <- result.try(attributes(rest))
|
||||
Ok(#(Token(name:, attributes:), rest))
|
||||
}
|
||||
|
||||
fn name_char(source: String) -> Parsed(String) {
|
||||
fn trim_start(source: BitArray) -> BitArray {
|
||||
case source {
|
||||
"" -> Error(UnexpectedEndOfFile)
|
||||
"a" <> rest -> Ok(#("a", rest))
|
||||
"b" <> rest -> Ok(#("b", rest))
|
||||
"c" <> rest -> Ok(#("c", rest))
|
||||
"d" <> rest -> Ok(#("d", rest))
|
||||
"e" <> rest -> Ok(#("e", rest))
|
||||
"f" <> rest -> Ok(#("f", rest))
|
||||
"g" <> rest -> Ok(#("g", rest))
|
||||
"h" <> rest -> Ok(#("h", rest))
|
||||
"i" <> rest -> Ok(#("i", rest))
|
||||
"j" <> rest -> Ok(#("j", rest))
|
||||
"k" <> rest -> Ok(#("k", rest))
|
||||
"l" <> rest -> Ok(#("l", rest))
|
||||
"m" <> rest -> Ok(#("m", rest))
|
||||
"n" <> rest -> Ok(#("n", rest))
|
||||
"o" <> rest -> Ok(#("o", rest))
|
||||
"p" <> rest -> Ok(#("p", rest))
|
||||
"q" <> rest -> Ok(#("q", rest))
|
||||
"r" <> rest -> Ok(#("r", rest))
|
||||
"s" <> rest -> Ok(#("s", rest))
|
||||
"t" <> rest -> Ok(#("t", rest))
|
||||
"u" <> rest -> Ok(#("u", rest))
|
||||
"v" <> rest -> Ok(#("v", rest))
|
||||
"w" <> rest -> Ok(#("w", rest))
|
||||
"x" <> rest -> Ok(#("x", rest))
|
||||
"y" <> rest -> Ok(#("y", rest))
|
||||
"z" <> rest -> Ok(#("z", rest))
|
||||
"_" <> rest -> Ok(#("_", rest))
|
||||
_ ->
|
||||
Error(UnexpectedNameCharacter(string.first(source) |> result.unwrap("")))
|
||||
<<32, rest:bits>>
|
||||
| <<9, rest:bits>>
|
||||
| <<10, rest:bits>>
|
||||
| <<11, rest:bits>>
|
||||
| <<12, rest:bits>>
|
||||
| <<13, rest:bits>> -> trim_start(rest)
|
||||
_ -> source
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(source: String) -> Parsed(String) {
|
||||
use #(char, rest) <- result.try(name_char(source))
|
||||
do_name(rest, char)
|
||||
@external(erlang, "gleam_stdlib", "identity")
|
||||
@external(javascript, "../gleam_stdlib.mjs", "codepoint")
|
||||
fn utf_codepoint_unsafe(a: Int) -> UtfCodepoint
|
||||
|
||||
fn name_char(source: BitArray) -> Parsed(UtfCodepoint) {
|
||||
case source {
|
||||
<<>> -> Error(UnexpectedEndOfFile)
|
||||
<<65 as i, rest:bits>>
|
||||
| <<66 as i, rest:bits>>
|
||||
| <<67 as i, rest:bits>>
|
||||
| <<68 as i, rest:bits>>
|
||||
| <<69 as i, rest:bits>>
|
||||
| <<70 as i, rest:bits>>
|
||||
| <<71 as i, rest:bits>>
|
||||
| <<72 as i, rest:bits>>
|
||||
| <<73 as i, rest:bits>>
|
||||
| <<74 as i, rest:bits>>
|
||||
| <<75 as i, rest:bits>>
|
||||
| <<76 as i, rest:bits>>
|
||||
| <<77 as i, rest:bits>>
|
||||
| <<78 as i, rest:bits>>
|
||||
| <<79 as i, rest:bits>>
|
||||
| <<80 as i, rest:bits>>
|
||||
| <<81 as i, rest:bits>>
|
||||
| <<82 as i, rest:bits>>
|
||||
| <<83 as i, rest:bits>>
|
||||
| <<84 as i, rest:bits>>
|
||||
| <<85 as i, rest:bits>>
|
||||
| <<86 as i, rest:bits>>
|
||||
| <<87 as i, rest:bits>>
|
||||
| <<88 as i, rest:bits>>
|
||||
| <<89 as i, rest:bits>>
|
||||
| <<90 as i, rest:bits>>
|
||||
| <<97 as i, rest:bits>>
|
||||
| <<98 as i, rest:bits>>
|
||||
| <<99 as i, rest:bits>>
|
||||
| <<100 as i, rest:bits>>
|
||||
| <<101 as i, rest:bits>>
|
||||
| <<102 as i, rest:bits>>
|
||||
| <<103 as i, rest:bits>>
|
||||
| <<104 as i, rest:bits>>
|
||||
| <<105 as i, rest:bits>>
|
||||
| <<106 as i, rest:bits>>
|
||||
| <<107 as i, rest:bits>>
|
||||
| <<108 as i, rest:bits>>
|
||||
| <<109 as i, rest:bits>>
|
||||
| <<110 as i, rest:bits>>
|
||||
| <<111 as i, rest:bits>>
|
||||
| <<112 as i, rest:bits>>
|
||||
| <<113 as i, rest:bits>>
|
||||
| <<114 as i, rest:bits>>
|
||||
| <<115 as i, rest:bits>>
|
||||
| <<116 as i, rest:bits>>
|
||||
| <<117 as i, rest:bits>>
|
||||
| <<118 as i, rest:bits>>
|
||||
| <<119 as i, rest:bits>>
|
||||
| <<120 as i, rest:bits>>
|
||||
| <<121 as i, rest:bits>>
|
||||
| <<122 as i, rest:bits>>
|
||||
| <<48 as i, rest:bits>>
|
||||
| <<49 as i, rest:bits>>
|
||||
| <<50 as i, rest:bits>>
|
||||
| <<51 as i, rest:bits>>
|
||||
| <<52 as i, rest:bits>>
|
||||
| <<53 as i, rest:bits>>
|
||||
| <<54 as i, rest:bits>>
|
||||
| <<55 as i, rest:bits>>
|
||||
| <<56 as i, rest:bits>>
|
||||
| <<57 as i, rest:bits>>
|
||||
| <<95 as i, rest:bits>>
|
||||
| <<42 as i, rest:bits>>
|
||||
| <<45 as i, rest:bits>>
|
||||
| <<46 as i, rest:bits>> -> Ok(#(utf_codepoint_unsafe(i), rest))
|
||||
<<cp:utf8_codepoint, _:bits>> ->
|
||||
Error(UnexpectedNameCharacter(string.from_utf_codepoints([cp])))
|
||||
source -> Error(InvalidUtf8Character(source))
|
||||
}
|
||||
}
|
||||
|
||||
fn do_name(source: String, result: String) -> Parsed(String) {
|
||||
fn do_name(source: BitArray, result: List(UtfCodepoint)) -> Parsed(String) {
|
||||
case name_char(source) {
|
||||
Ok(#(char, rest)) -> do_name(rest, result <> char)
|
||||
Error(_) -> Ok(#(result, source))
|
||||
Ok(#(cp, rest)) -> do_name(rest, [cp, ..result])
|
||||
Error(_) ->
|
||||
Ok(#(result |> list.reverse |> string.from_utf_codepoints, source))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attributes(source: String) -> Parsed(List(SExpr)) {
|
||||
do_attributes(source, [])
|
||||
}
|
||||
|
||||
fn do_attributes(source: String, attributes: List(SExpr)) -> Parsed(List(SExpr)) {
|
||||
case string.trim_start(source) {
|
||||
"" -> Error(UnexpectedEndOfFile)
|
||||
")" <> rest -> Ok(#(list.reverse(attributes), rest))
|
||||
fn do_attributes(source: BitArray, attrs: List(SExpr)) -> Parsed(List(SExpr)) {
|
||||
case trim_start(source) {
|
||||
<<>> -> Error(UnexpectedEndOfFile)
|
||||
<<41, rest:bits>> -> Ok(#(attrs |> list.reverse, rest))
|
||||
source -> {
|
||||
use #(attribute, rest) <- result.try(attribute(source))
|
||||
do_attributes(rest, [attribute, ..attributes])
|
||||
use #(attr, rest) <- result.try(attribute(source))
|
||||
do_attributes(rest, [attr, ..attrs])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attribute(source: String) -> Parsed(SExpr) {
|
||||
fn attribute(source: BitArray) -> Parsed(SExpr) {
|
||||
case source {
|
||||
"" -> Error(UnexpectedEndOfFile)
|
||||
"(" <> rest -> {
|
||||
use #(token, rest) <- result.try(token(rest))
|
||||
Ok(#(token, rest))
|
||||
<<>> -> Error(UnexpectedEndOfFile)
|
||||
<<40, rest:bits>> -> {
|
||||
use #(cp, rest) <- result.try(name_char(rest))
|
||||
use #(name, rest) <- result.try(do_name(rest, [cp]))
|
||||
use #(attributes, rest) <- result.try(do_attributes(rest, []))
|
||||
Ok(#(Token(name:, attributes:), rest))
|
||||
}
|
||||
"\"" <> rest -> {
|
||||
use #(str, rest) <- result.try(string(rest))
|
||||
<<34, rest:bits>> -> {
|
||||
use #(str, rest) <- result.try(do_string(rest, []))
|
||||
Ok(#(String(str), rest))
|
||||
}
|
||||
<<45 as i, rest:bits>>
|
||||
| <<48 as i, rest:bits>>
|
||||
| <<49 as i, rest:bits>>
|
||||
| <<50 as i, rest:bits>>
|
||||
| <<51 as i, rest:bits>>
|
||||
| <<52 as i, rest:bits>>
|
||||
| <<53 as i, rest:bits>>
|
||||
| <<54 as i, rest:bits>>
|
||||
| <<55 as i, rest:bits>>
|
||||
| <<56 as i, rest:bits>>
|
||||
| <<57 as i, rest:bits>> ->
|
||||
do_number(rest, #([utf_codepoint_unsafe(i)], False))
|
||||
source -> {
|
||||
use <- option.lazy_unwrap(try_number(source))
|
||||
use #(name, rest) <- result.try(name(source))
|
||||
use #(cp, rest) <- result.try(name_char(source))
|
||||
use #(name, rest) <- result.try(do_name(rest, [cp]))
|
||||
Ok(#(Name(name), rest))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn string(source: String) -> Parsed(String) {
|
||||
use <- bool.guard(source == "", Error(UnexpectedEndOfFile))
|
||||
do_string(source, "")
|
||||
}
|
||||
|
||||
fn do_string(source: String, result: String) -> Parsed(String) {
|
||||
case string.split_once(source, "\"") {
|
||||
Ok(#(start, rest)) ->
|
||||
case string.last(start) {
|
||||
Ok("\\") -> do_string(rest, result <> "\"" <> start)
|
||||
_ ->
|
||||
Ok(#(
|
||||
result <> start,
|
||||
// |> string.replace("\\n", "\n")
|
||||
// |> string.replace("\\r", "\r")
|
||||
// |> string.replace("\\t", "\t")
|
||||
// |> string.replace("\\f", "\f")
|
||||
rest,
|
||||
))
|
||||
}
|
||||
Error(Nil) -> Error(UnterminatedString(source))
|
||||
}
|
||||
}
|
||||
|
||||
fn number_char(source: String) -> Parsed(String) {
|
||||
fn do_string(source: BitArray, acc: List(UtfCodepoint)) -> Parsed(String) {
|
||||
case source {
|
||||
"" -> Error(UnexpectedEndOfFile)
|
||||
"." <> rest -> Ok(#(".", rest))
|
||||
"-" <> rest -> Ok(#("-", rest))
|
||||
"0" <> rest -> Ok(#("0", rest))
|
||||
"1" <> rest -> Ok(#("1", rest))
|
||||
"2" <> rest -> Ok(#("2", rest))
|
||||
"3" <> rest -> Ok(#("3", rest))
|
||||
"4" <> rest -> Ok(#("4", rest))
|
||||
"5" <> rest -> Ok(#("5", rest))
|
||||
"6" <> rest -> Ok(#("6", rest))
|
||||
"7" <> rest -> Ok(#("7", rest))
|
||||
"8" <> rest -> Ok(#("8", rest))
|
||||
"9" <> rest -> Ok(#("9", rest))
|
||||
_ ->
|
||||
Error(UnexpectedNumberCharacter(string.first(source) |> result.unwrap("")))
|
||||
<<>> ->
|
||||
Error(UnterminatedString(
|
||||
acc |> list.reverse |> string.from_utf_codepoints,
|
||||
))
|
||||
<<34, rest:bits>> ->
|
||||
Ok(#(acc |> list.reverse |> string.from_utf_codepoints, rest))
|
||||
<<92, 48, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(0), ..acc])
|
||||
<<92, 97, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(7), ..acc])
|
||||
<<92, 98, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(8), ..acc])
|
||||
<<92, 116, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(9), ..acc])
|
||||
<<92, 110, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(10), ..acc])
|
||||
<<92, 118, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(11), ..acc])
|
||||
<<92, 102, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(12), ..acc])
|
||||
<<92, 114, rest:bits>> -> do_string(rest, [utf_codepoint_unsafe(13), ..acc])
|
||||
<<92, cp:utf8_codepoint, rest:bits>> -> do_string(rest, [cp, ..acc])
|
||||
<<cp:utf8_codepoint, rest:bits>> -> do_string(rest, [cp, ..acc])
|
||||
source -> Error(InvalidUtf8Character(source))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_number(source: String) -> Option(Parsed(SExpr)) {
|
||||
case number_char(source) {
|
||||
Ok(#(char, rest)) -> Some(do_number(rest, char))
|
||||
Error(_) -> None
|
||||
}
|
||||
}
|
||||
|
||||
fn do_number(source: String, result: String) -> Parsed(SExpr) {
|
||||
case number_char(source) {
|
||||
Ok(#(char, rest)) -> do_number(rest, result <> char)
|
||||
Error(_) -> {
|
||||
case int.parse(result) {
|
||||
Ok(n) -> Ok(#(Int(n), source))
|
||||
Error(Nil) -> {
|
||||
let result = case result {
|
||||
"-." <> rest -> "-0." <> rest
|
||||
"." <> rest -> "0." <> rest
|
||||
result -> result
|
||||
}
|
||||
case float.parse(result) {
|
||||
fn do_number(
|
||||
source: BitArray,
|
||||
acc: #(List(UtfCodepoint), Bool),
|
||||
) -> Parsed(SExpr) {
|
||||
case source, acc {
|
||||
<<>>, _ -> Error(UnexpectedEndOfFile)
|
||||
<<46 as i, _:bits>>, #(cps, True) ->
|
||||
Error(InvalidNumber(
|
||||
[utf_codepoint_unsafe(i), ..cps]
|
||||
|> list.reverse
|
||||
|> string.from_utf_codepoints,
|
||||
))
|
||||
<<46 as i, rest:bits>>, #(cps, False) ->
|
||||
do_number(rest, #([utf_codepoint_unsafe(i), ..cps], True))
|
||||
<<48 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<49 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<50 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<51 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<52 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<53 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<54 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<55 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<56 as i, rest:bits>>, #(cps, has_decimal)
|
||||
| <<57 as i, rest:bits>>, #(cps, has_decimal)
|
||||
-> do_number(rest, #([utf_codepoint_unsafe(i), ..cps], has_decimal))
|
||||
source, #(cps, has_decimal) -> {
|
||||
let str = cps |> list.reverse |> string.from_utf_codepoints
|
||||
case has_decimal {
|
||||
True ->
|
||||
case float.parse(str) {
|
||||
Ok(n) -> Ok(#(Float(n), source))
|
||||
Error(Nil) -> Error(InvalidNumber(result))
|
||||
Error(Nil) -> Error(InvalidNumber(str))
|
||||
}
|
||||
False ->
|
||||
case int.parse(str) {
|
||||
Ok(n) -> Ok(#(Int(n), source))
|
||||
Error(Nil) -> Error(InvalidNumber(str))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue