417 lines
11 KiB
Gleam
417 lines
11 KiB
Gleam
import gleam/string
|
|
import gleam/result
|
|
import gleam/list
|
|
|
|
/// The custom error type for the parser,
|
|
/// which can itself be parameterized by a user-defined error type.
|
|
/// The user-defined error type is useful for, for example,
|
|
/// adding a `int.parse` call into your parser pipeline.
|
|
/// See `try` for using this feature.
|
|
pub type ParseError(e) {
|
|
Unexpected(pos: Position, error: String)
|
|
UserError(pos: Position, error: e)
|
|
}
|
|
|
|
/// The type for positions within a string.
|
|
pub type Position {
|
|
Position(row: Int, col: Int)
|
|
}
|
|
|
|
/// The parser type, parameterized by the type it parses and
|
|
/// the user-defined error type it can return.
|
|
pub type Parser(a, e) {
|
|
Parser(
|
|
parse: fn(List(String), Position) ->
|
|
Result(#(a, List(String), Position), ParseError(e)),
|
|
)
|
|
}
|
|
|
|
/// Apply a parser to a list of graphemes (holding on to extra result info that is hidden from the library user).
|
|
fn run(
|
|
p: Parser(a, e),
|
|
src: List(String),
|
|
pos: Position,
|
|
) -> Result(#(a, List(String), Position), ParseError(e)) {
|
|
case p {
|
|
Parser(f) -> f(src, pos)
|
|
}
|
|
}
|
|
|
|
/// Apply a parser to a string.
|
|
pub fn go(p: Parser(a, e), src: String) -> Result(a, ParseError(e)) {
|
|
case run(p, string.to_graphemes(src), Position(1, 1)) {
|
|
Ok(#(x, _, _)) -> Ok(x)
|
|
Error(e) -> Error(e)
|
|
}
|
|
}
|
|
|
|
/// Get the current parser position.
|
|
pub fn pos() -> Parser(Position, e) {
|
|
Parser(fn(source, p) { Ok(#(p, source, p)) })
|
|
}
|
|
|
|
/// Parse a character if it matches the predicate.
|
|
pub fn satisfy(when pred: fn(String) -> Bool) -> Parser(String, e) {
|
|
Parser(fn(source, pos) {
|
|
let assert Position(row, col) = pos
|
|
case source {
|
|
[h, ..t] ->
|
|
case pred(h) {
|
|
True ->
|
|
case h {
|
|
"\n" -> Ok(#(h, t, Position(row + 1, 0)))
|
|
_ -> Ok(#(h, t, Position(row, col + 1)))
|
|
}
|
|
False -> Error(Unexpected(pos, h))
|
|
}
|
|
[] -> Error(Unexpected(pos, "EOF"))
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Parse a lowercase letter.
|
|
pub fn lowercase_letter() -> Parser(String, e) {
|
|
satisfy(when: fn(c) { string.contains("abcdefghijklmnopqrstuvwxyz", c) })
|
|
}
|
|
|
|
/// Parse an uppercase letter.
|
|
pub fn uppercase_letter() -> Parser(String, e) {
|
|
satisfy(when: fn(c) { string.contains("ABCDEFGHIJKLMNOPQRSTUVWXYZ", c) })
|
|
}
|
|
|
|
/// Parse a lowercase or uppercase letter.
|
|
pub fn letter() -> Parser(String, e) {
|
|
either(lowercase_letter(), uppercase_letter())
|
|
}
|
|
|
|
/// Parse a specific character.
|
|
pub fn char(c) -> Parser(String, e) {
|
|
satisfy(when: fn(c2) { c == c2 })
|
|
}
|
|
|
|
/// Parse a digit.
|
|
pub fn digit() -> Parser(String, e) {
|
|
satisfy(fn(c) { string.contains("0123456789", c) })
|
|
}
|
|
|
|
/// Parse a sequence of digits.
|
|
pub fn digits() -> Parser(String, e) {
|
|
many1_concat(digit())
|
|
}
|
|
|
|
/// Parse the first parser, or the second if the first fails.
|
|
pub fn either(p: Parser(a, e), q: Parser(a, e)) -> Parser(a, e) {
|
|
Parser(fn(source, pos) { result.or(run(p, source, pos), run(q, source, pos)) })
|
|
}
|
|
|
|
/// Parse with the first parser in the list that doesn't fail.
|
|
pub fn choice(ps: List(Parser(a, e))) -> Parser(a, e) {
|
|
Parser(fn(source, pos) {
|
|
case ps {
|
|
[] -> panic as "choice doesn't accept an empty list of parsers"
|
|
// TODO: should this be an Unexpected instead?
|
|
[p] -> run(p, source, pos)
|
|
[p, ..t] ->
|
|
case run(p, source, pos) {
|
|
Ok(#(x, r, pos2)) -> Ok(#(x, r, pos2))
|
|
Error(_) -> run(choice(t), source, pos)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Parse an alphanumeric character.
|
|
pub fn alphanum() -> Parser(String, e) {
|
|
either(digit(), letter())
|
|
}
|
|
|
|
/// Parse zero or more whitespace characters.
|
|
pub fn whitespace() -> Parser(String, e) {
|
|
many_concat(choice([char(" "), char("\t"), char("\n")]))
|
|
}
|
|
|
|
/// Parse one or more whitespace characters.
|
|
pub fn whitespace1() -> Parser(String, e) {
|
|
many1_concat(choice([char(" "), char("\t"), char("\n")]))
|
|
}
|
|
|
|
/// Keep trying the parser until it fails, and return the array of parsed results.
|
|
/// This cannot fail because it parses zero or more times!
|
|
pub fn many(p: Parser(a, e)) -> Parser(List(a), e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Error(_) -> Ok(#([], source, pos))
|
|
Ok(#(x, r, pos2)) ->
|
|
result.map(run(many(p), r, pos2), fn(res) {
|
|
#([x, ..res.0], res.1, res.2)
|
|
})
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Parse a certain string as many times as possible, returning everything that was parsed.
|
|
/// This cannot fail because it parses zero or more times!
|
|
pub fn many_concat(p: Parser(String, e)) -> Parser(String, e) {
|
|
many(p)
|
|
|> map(string.concat)
|
|
}
|
|
|
|
/// Keep trying the parser until it fails, and return the array of parsed results.
|
|
/// This can fail, because it must parse successfully at least once!
|
|
pub fn many1(p: Parser(a, e)) -> Parser(List(a), e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Error(e) -> Error(e)
|
|
Ok(#(x, r, pos2)) ->
|
|
result.map(run(many(p), r, pos2), fn(res) {
|
|
#([x, ..res.0], res.1, res.2)
|
|
})
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Parse a certain string as many times as possible, returning everything that was parsed.
|
|
/// This can fail, because it must parse successfully at least once!
|
|
pub fn many1_concat(p: Parser(String, e)) -> Parser(String, e) {
|
|
many1(p)
|
|
|> map(string.concat)
|
|
}
|
|
|
|
/// Do the first parser, ignore its result, then do the second parser.
|
|
pub fn seq(p: Parser(a, e), q: Parser(b, e)) -> Parser(b, e) {
|
|
use _ <- do(p)
|
|
q
|
|
}
|
|
|
|
/// Parse a sequence separated by the given separator parser.
|
|
pub fn sep(parser: Parser(a, e), by s: Parser(b, e)) -> Parser(List(a), e) {
|
|
use mb_a <- do(perhaps(parser))
|
|
case mb_a {
|
|
Ok(a) -> {
|
|
use rest <- do(many(right(s, parser)))
|
|
return([a, ..rest])
|
|
}
|
|
Error(Nil) -> return([])
|
|
}
|
|
}
|
|
|
|
/// Parse a sequence separated by the given separator parser.
|
|
/// This only succeeds if at least one element of the sequence was parsed.
|
|
pub fn sep1(parser: Parser(a, e), by s: Parser(b, e)) -> Parser(List(a), e) {
|
|
use sequence <- do(sep(parser, by: s))
|
|
case sequence {
|
|
[] -> fail()
|
|
_ -> return(sequence)
|
|
}
|
|
}
|
|
|
|
/// Do `p`, then apply `f` to the result if it succeeded.
|
|
pub fn map(p: Parser(a, e), f: fn(a) -> b) -> Parser(b, e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Ok(#(x, r, pos2)) -> Ok(#(f(x), r, pos2))
|
|
Error(e) -> Error(e)
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Do `p`, the apply `f` to the result if it succeeded.
|
|
/// `f` itself can fail with the user-defined error type,
|
|
/// and if it does the result is a `UserError` with the error.
|
|
pub fn try(p: Parser(a, e), f: fn(a) -> Result(b, e)) -> Parser(b, e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Ok(#(x, r, pos2)) ->
|
|
case f(x) {
|
|
Ok(a) -> Ok(#(a, r, pos2))
|
|
Error(e) -> Error(UserError(pos2, e))
|
|
}
|
|
Error(e) -> Error(e)
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Transform the user-defined error type
|
|
/// with a user-provided conversion function.
|
|
pub fn error_map(p: Parser(a, e), f: fn(e) -> f) -> Parser(a, f) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Ok(res) -> Ok(res)
|
|
Error(e) ->
|
|
case e {
|
|
UserError(pos, e) -> Error(UserError(pos, f(e)))
|
|
Unexpected(pos, s) -> Error(Unexpected(pos, s))
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Try running a parser, but still succeed (with `Error(Nil)`) if it failed.
|
|
pub fn perhaps(p: Parser(a, e)) -> Parser(Result(a, Nil), e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Ok(#(x, r, pos2)) -> Ok(#(Ok(x), r, pos2))
|
|
Error(_) -> Ok(#(Error(Nil), source, pos))
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Do each parser in the list, returning the result of the last parser.
|
|
pub fn all(ps: List(Parser(a, e))) -> Parser(a, e) {
|
|
case ps {
|
|
[p] -> p
|
|
[h, ..t] -> {
|
|
use _ <- do(h)
|
|
all(t)
|
|
}
|
|
_ -> panic as "all(parsers) doesn't accept an empty list of parsers"
|
|
}
|
|
// TODO: should this be an Unexpected instead?
|
|
}
|
|
|
|
/// Parse an exact string of characters.
|
|
pub fn string(s: String) -> Parser(String, e) {
|
|
case string.pop_grapheme(s) {
|
|
Ok(#(h, t)) -> {
|
|
use c <- do(char(h))
|
|
use rest <- do(string(t))
|
|
return(c <> rest)
|
|
}
|
|
Error(_) -> return("")
|
|
}
|
|
}
|
|
|
|
/// Negate a parser: if it succeeds, this fails, and vice versa.
|
|
/// Example: `seq(string("if"), not(either(alphanum(), char("_"))))`
|
|
pub fn not(p: Parser(a, e)) -> Parser(Nil, e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Ok(_) -> Error(Unexpected(pos, ""))
|
|
// todo: better error message here (add a label system)
|
|
Error(_) -> Ok(#(Nil, source, pos))
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Parses successfully only when at the end of the input string.
|
|
pub fn end() -> Parser(Nil, e) {
|
|
Parser(fn(source, pos) {
|
|
case source {
|
|
[] -> Ok(#(Nil, source, pos))
|
|
[h, ..] -> Error(Unexpected(pos, h))
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Run a parser as normal, but the parser itself isn't evaluated until it is used.
|
|
/// This is needed for recursive grammars, such as `E := n | E + E` where `n` is a number.
|
|
/// Example: `lazy(digit)` instead of `digit()`.
|
|
pub fn lazy(p: fn() -> Parser(a, e)) -> Parser(a, e) {
|
|
Parser(fn(source, pos) { run(p(), source, pos) })
|
|
}
|
|
|
|
/// A monadic bind for pleasant interplay with gleam's `use` syntax.
|
|
/// example:
|
|
/// ```
|
|
/// fn identifier() -> Parser(String, e) {
|
|
/// use pos <- do(pos())
|
|
/// use first <- do(lowercase_letter())
|
|
/// use rest <- do(many(alt(alphanum(), char("_"))))
|
|
/// return(Ident(pos, first <> string.concat(rest)))
|
|
/// }
|
|
/// ```
|
|
pub fn do(p: Parser(a, e), f: fn(a) -> Parser(b, e)) -> Parser(b, e) {
|
|
Parser(fn(source, pos) {
|
|
case run(p, source, pos) {
|
|
Ok(#(x, r, pos2)) -> run(f(x), r, pos2)
|
|
Error(e) -> Error(e)
|
|
}
|
|
})
|
|
}
|
|
|
|
/// A monadic return for pleasant interplay with gleam's `use` syntax.
|
|
/// see `do` for more details and an example.
|
|
/// This is redundant if the last `do` is a `map` instead.
|
|
/// But I prefer using it, stylistically.
|
|
pub fn return(x) {
|
|
Parser(fn(source, pos) { Ok(#(x, source, pos)) })
|
|
}
|
|
|
|
/// Immediately fail regardless of the next input
|
|
pub fn fail() -> Parser(a, e) {
|
|
Parser(fn(source, pos) {
|
|
case source {
|
|
[] -> Error(Unexpected(pos, "EOF"))
|
|
[h, ..] -> Error(Unexpected(pos, h))
|
|
}
|
|
})
|
|
}
|
|
|
|
pub fn left(p: Parser(a, e), q: Parser(b, e)) -> Parser(a, e) {
|
|
use x <- do(p)
|
|
use _ <- do(q)
|
|
return(x)
|
|
}
|
|
|
|
pub fn right(p: Parser(a, e), q: Parser(b, e)) -> Parser(b, e) {
|
|
use _ <- do(p)
|
|
use x <- do(q)
|
|
return(x)
|
|
}
|
|
|
|
pub fn pair(p: Parser(a, e), q: Parser(b, e)) -> Parser(#(a, b), e) {
|
|
use x <- do(p)
|
|
use y <- do(q)
|
|
return(#(x, y))
|
|
}
|
|
|
|
pub fn between(
|
|
p: Parser(a, e),
|
|
q: Parser(b, e),
|
|
r: Parser(c, e),
|
|
) -> Parser(a, e) {
|
|
use _ <- do(q)
|
|
use x <- do(p)
|
|
use _ <- do(r)
|
|
return(x)
|
|
}
|
|
|
|
pub fn concat(p: Parser(String, e), q: Parser(String, e)) -> Parser(String, e) {
|
|
use x <- do(p)
|
|
use y <- do(q)
|
|
return(x <> y)
|
|
}
|
|
|
|
pub fn choice_char(vs: List(String)) -> Parser(String, e) {
|
|
vs
|
|
|> list.map(char)
|
|
|> choice
|
|
}
|
|
|
|
pub fn perhaps_default(p: Parser(a, e), d: a) -> Parser(a, e) {
|
|
use x <- do(perhaps(p))
|
|
return(result.unwrap(x, d))
|
|
}
|
|
|
|
pub fn perhaps_empty(p: Parser(String, e)) -> Parser(String, e) {
|
|
perhaps_default(p, "")
|
|
}
|
|
|
|
pub fn to(p: Parser(a, e), v: b) -> Parser(b, e) {
|
|
use _ <- do(p)
|
|
return(v)
|
|
}
|
|
|
|
pub fn any() -> Parser(String, e) {
|
|
Parser(fn(source, pos) {
|
|
let assert Position(row, col) = pos
|
|
case source {
|
|
[h, ..t] ->
|
|
case h {
|
|
"\n" -> Ok(#(h, t, Position(row + 1, 0)))
|
|
_ -> Ok(#(h, t, Position(row, col + 1)))
|
|
}
|
|
[] -> Error(Unexpected(pos, "EOF"))
|
|
}
|
|
})
|
|
}
|