elixir
!
"
#


Interactive Elixir (1.1.0)
iex(1)> import Paco
nil
iex(2)> import Paco.Parser
nil
iex(3)> parse("a", lit("a"))
{:ok, "a"}
iex(4)> parse("a", lit("a"), format: :raw)
%Paco.Success{
from: {0, 1, 1},
to: {0, 1, 1},
at: {1, 1, 2},
result: "a",
tail: "",
...}
iex(5)> parse("aaa", lit("aaa"))
{:ok, "aaa"}
iex(6)> "aaa" |> parse(lit("aaa"))
{:ok, "aaa"}
iex(7)> "aaa" |> parse(lit("a"))
{:ok, "a"}
iex(8)> "aaa" |> parse(lit("a"), format: :raw)
%Paco.Success{
from: {0, 1, 1},
to: {0, 1, 1},
at: {1, 1, 2},
result: "a",
tail: "aa",
...}
iex(9)> "b" |> parse(lit("a"))
{:error, "expected "a" at 1:1 but got "b""}
iex(10)> "b" |> parse(lit("a"), format: :raw)
%Paco.Failure{
at: {0, 1, 1},
expected: "a",
tail: "b",
...}
iex(1)> "aaa" |> parse(any)
{:ok, "a"}
iex(2)> "aaa" |> parse(any(1))
{:ok, "a"}
iex(3)> "aaa" |> parse(any(2))
{:ok, "aa"}
iex(4)> "a" |> parse(any(2))
{:error, "expected exactly 2 characters at 1:1 but got "a""}
iex(5)> "aaa" |> parse(any(at_least: 2))
{:ok, "aaa"}
iex(6)> "aaa" |> parse(any(at_most: 2))
{:ok, "aa"}
iex(1)> "bbabcd" |> parse(while("abc"))
{:ok, "bbabc"}
iex(2)> "xxx" |> parse(while("abc"))
{:ok, ""}
iex(3)> "xxx" |> parse(while("abc", at_least: 2))
{:error,
"expected at least 2 characters in alphabet "abc" at 1:1
but got "xx""}
iex(4)> import Paco.ASCII, only: [lowercase?: 1]
iex(5)> "abCD" |> parse(while(&lowercase?/1))
{:error, "ab"}
iex(6)> "abCD" |> parse(while(&lowercase?/1, at_least: 3))
{:error,
"expected at least 3 lowercase characters at 1:1
but got "abC""}
iex(1)> "abc" |> parse(until("c"))
{:ok, "ab"}
iex(2)> "abcdc" |> parse(until("c", escaped_with: ""))
{:ok, "abcd"}
iex(3)> "abcdc" |> parse(until("c", escaped_with: "",
keep_escape: true))
{:ok, "abcd"}
iex(4)> "abc" |> parse(until("d"))
{:error, "expected something ended by "d" at 1:1
but got "abc""}
iex(5)> "abc" |> parse(until("d"), eof: true)
{:error, "abc"}
iex(1)> "ab" |> parse(sequence_of([lit("a"), lit("b")]))
{:ok, ["a", "b"]}
iex(2)> "ac" |> parse(sequence_of([lit("a"), lit("b")]))
{:error, "expected "b" at 1:2 but got "c""}}
iex(3)> ab = sequence_of([lit("a"), lit("b")])
%Paco.Parser{...}
iex(4)> "abc" |> parse(sequence_of([ab, lit("c")]))
{:ok, [["a", "b"], "c"]}
iex(5)> "xxx" |> parse(sequence_of([ab, lit("c")]))
{:error, "expected "a" at 1:1 but got "x""}
iex(6)> "axx" |> parse(sequence_of([ab, lit("c")]))
{:error, "expected "b" at 1:2 but got "x""}
iex(7)> "abx" |> parse(sequence_of([ab, lit("c")]))
{:error, "expected "c" at 1:3 but got "x""}
iex(1)> "a" |> parse(one_of([lit("a"), lit("b")]))
{:ok, "a"}
iex(2)> "b" |> parse(one_of([lit("a"), lit("b")]))
{:ok, "b"}
iex(4)> # farthest failure (higher rank) wins
nil
iex(3)> "ab" |> parse(one_of([lit("ac"), lit("bc")]))
{:error, "expected "ac" at 1:1 but got "ab""}
iex(6)> # failures with same rank are composed
nil
iex(5)> "ab" |> parse(one_of([lit("ac"), lit("ad")]))
{:error, "expected one of ["ac", "ad"] at 1:1
but got "ab""}
iex(1)> "aaa" |> parse(repeat(lit("a")))
{:ok, ["a", "a", "a"]}
iex(2)> "aaa" |> parse(repeat(lit("a"), 2))
{:ok, ["a", "a"]}
iex(4)> "aaa" |> parse(repeat(lit("a"), at_most: 2))
{:ok, ["a", "a"]}
iex(3)> "aaa" |> parse(repeat(lit("a"), at_least: 4))
{:error, ""expected "a" at 1:2 but got the end of input"}
iex(6)> "abba" |> parse(repeat(one_of([lit("a"), lit("b")])))
{:ok, ["a", "b", "b", "a"]}
defmodule Paco.ASCII do
!
@upper ["A","B","C","D","E",...,"Z"]
!
@classes [... {:upper, :upper?, @upper}, ...]
for {class, is_class, elements} <- @classes do
def unquote(class)(), do: unquote(elements)
for element <- elements do
def unquote(is_class)(<<unquote(element)>>), do: true
end
def unquote(is_class)(_), do: false
end
!
# def upper, do: @upper
# def upper?("A"), do: true
# def upper?("B"), do: true
# ...
# def upper?(_), do: false
ws = while(ASCII.ws)
!
hello = lit("Hello")
separator = sequence_of([ws, lit(","), ws])
what = while(ASCII.letter, at_least: 1)
terminator = sequence_of([ws, lit("!")])
!
greetings = sequence_of([hello, separator, what, terminator])
!
parse("Hello,World!", greetings) |> IO.inspect
# {:ok, ["Hello", ["", ",", ""], "World", ["", "!"]]}
!
parse("Hello, BEAM!", greetings) |> IO.inspect
# {:ok, ["Hello", ["", ",", " "], "BEAM", ["", "!"]]}
# Good, not great: skip everything that is not interesting
!
ws = while(ASCII.ws)
!
hello = lit("Hello") |> skip
separator = sequence_of([ws, lit(","), ws]) |> skip
what = while(ASCII.letter, at_least: 1)
terminator = sequence_of([ws, lit("!")]) |> skip
!
greetings = sequence_of([hello, separator, what, terminator])
!
parse("Hello,World!", greetings) |> IO.inspect
# {:ok, ["World"]}
!
parse("Hello, BEAM!", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
# Not everyone are so loud, `!` should be optional
!
ws = while(ASCII.ws)
!
hello = lit("Hello") |> skip
separator = sequence_of([ws, lit(","), ws]) |> skip
what = while(ASCII.letter, at_least: 1)
terminator = sequence_of([ws, lit("!")]) |> maybe
!
greetings = sequence_of([hello, separator, what, terminator])
!
parse("Hello, BEAM!", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
!
parse("Hello, BEAM", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
# Let's get rid of non significant whitespaces with lex(s)
!
# In module Paco.Parser...
!
parser lex(s),
as: lit(s) |> surrounded_by(maybe(whitespaces))
!
parser surrounded_by(p, left, right),
as: sequence_of([skip(left), p, skip(right)])
# Use lex Luke!
!
ws = while(ASCII.ws)
!
hello = lit("Hello") |> skip
what = while(ASCII.letter, at_least: 1)
separator = lex(",") |> skip
terminator = lex("!") |> maybe
!
greetings = sequence_of([hello, separator, what, terminator])
!
parse("Hello, BEAM!", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
!
parse("Hello, BEAM", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
# It's common to have something non significant
# that follows or precedes something significant
!
# In module Paco.Parser...
!
parser followed_by(p, right),
as: sequence_of([p, skip(right)])
!
parser preceded_by(p, right),
as: sequence_of([skip(left), p])
# An alternative and shorter version
!
what = while(ASCII.letter, at_least: 1)
!
greetings = what
|> preceded_by(lit("Hello")
|> followed_by(lex(",")))
|> followed_by(maybe(lex("!")))
!
!
parse("Hello, BEAM!", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
!
parse("Hello, BEAM", greetings) |> IO.inspect
# {:ok, ["BEAM"]}
# Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
operator = one_of([lex("+"), lex("-")])
!
expression = number |> separated_by(operator)
!
parse("1", expression) |> IO.inspect
# {:ok, ["1"]}
!
parse("1 + 2", expression) |> IO.inspect
# {:ok, ["1", "2"]}
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, ["1", "2", "3"]}
!
# Small problem... to compute the value we need the operators!
# Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
operator = one_of([lex("+"), lex("-")])
!
expression = number |> separated_by(keep(operator))
!
parse("1", expression) |> IO.inspect
# {:ok, ["1"]}
!
parse("1 + 2", expression) |> IO.inspect
# {:ok, ["1", "+", "2"]}
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, ["1", "+", "2", "-", "3"]}
!
# Ok, but we need numbers not strings
# In module Paco.Parser...
parser bind(p, f) do
fn state, _ ->
case p.parse.(state, p) do
%Success{result: result} = success ->
case f.(result, success) do
%Failure{} = failure ->
failure
%Success{} = success ->
success
result ->
%Success{success|result: result}
end
%Failure{} = failure ->
failure
end
end
end
# Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
operator = one_of([lex("+"), lex("-")])
!
expression = number |> separated_by(keep(operator))
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, [1, "+", 2, "-", 3]}
!
# Missing only the last step... compute the result :-)
# Parse a sequence of numbers separated by `+` or `-`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
operator = one_of([lex("+"), lex("-")])
!
expression = number
|> separated_by(keep(operator))
|> bind(&Paco.Transform.separated_by(&1,
fn("+", n, m) -> n + m
("-", n, m) -> n - m
end))
!
!
parse("1 + 2 - 3", expression) |> IO.inspect
# {:ok, 0]}
# Parse a{n}b{n}c{n} where n ∈ ℕ
!
# If you knew the `n` (ex. 3) it would be easy
!
p = sequence_of([while("a", 3), while("b", 3), while("c", 3)])
!
parse("aaabbbccc", p) |> IO.inspect
# {:ok, ["aaa", "bbb", "ccc"]}
!
# We need to be able to peek ahead and then create a parser
# with that knowledge
# In module Paco.Parser...
!
parser peek(box(p)) do
fn %State{at: at, text: text} = state, _ ->
case p.parse.(state, p) do
%Success{result: result} ->
%Success{from: at, to: at, at: at,
tail: text,
result: result}
%Failure{} = failure ->
failure
end
end
end
# In module Paco.Parser...
!
parser then(p, f) when is_function(f), as:
bind(p, f)
|> bind(fn(p, _, s) -> p.parse.(s, p) end)
# Parse a{n}b{n}c{n} where n ∈ ℕ
!
p = peek(while("a"))
|> then(fn(a) ->
len = String.length(a)
sequence_of([while("a", len),
while("b", len),
while("c", len)])
end)
!
parse("aaabbbccc", p) |> IO.inspect
# {:ok, ["aaa", "bbb", "ccc"]}
!
parse("aaabbccc", p) |> IO.inspect
# {:error,
"expected exactly 3 characters in alphabet "b" at 1:4
but got "bbc""}
# An `element` is a word beginning with one uppercase letter
# followed by zero or more lowercase letters
element = sequence_of([while(ASCII.upper, 1),
while(ASCII.lower)])
!
# A `quantity` is a number greater than zero
# If the quantity is omitted assume the value of 1 as default
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
!
# A `reference` is an element optionally followed by a quantity
reference = sequence_of([element, quantity])
!
formula = repeat(reference, at_least: 1)
parse("H2O", formula) |> IO.inspect
# {:ok, [[["H", ""], 2], [["O", ""], 1]]}
!
# That's right but the output format sucks!
!
# What we really want is something like
# {:ok, [%{element: "H", quantity: 2},
%{element: "0", quantity: 1}]
!
# Is that possible???
defprotocol Paco.Parsable do
@moduledoc """
A protocol that converts terms into Paco parsers
"""
@fallback_to_any true
@doc """
Returns a parser that parses `t` and keeps the shape of `t`
"""
@spec to_parser(t) :: Paco.Parser.t
def to_parser(t)
end
defimpl Paco.Parsable, for: BitString do
import Paco.Parser
def to_parser(s) when is_binary(s) do
lit(s)
end
def to_parser(s) do
raise Protocol.UndefinedError, protocol: @protocol, value: s
end
end
iex(1)> "aaa" |> parse(lit("aaa"))
{:ok, "aaa"}
iex(2)> "aaa" |> parse("aaa")
{:ok, "aaa"}
defimpl Paco.Parsable, for: List do
import Paco.Parser
def to_parser(l) do
sequence_of(l)
end
end
iex(1)> "ab" |> parse(sequence_of([lit("a"), lit("b")]))
{:ok, ["a", "b"]}
iex(2)> "ab" |> parse(sequence_of(["a", "b"]))
{:ok, ["a", "b"]}
iex(3)> "ab" |> parse(["a", "b"])
{:ok, ["a", "b"]}
defimpl Paco.Parsable, for: Tuple do
import Paco.Parser
def to_parser(tuple) do
sequence_of(Tuple.to_list(tuple))
|> bind(&List.to_tuple/1)
end
end
iex(1)> "ab" |> parse({"a", "b"}))
{:ok, {"a", "b"}}
defimpl Paco.Parsable, for: Map do
import Paco.Parser
def to_parser(tuple) do
{keys, values} = {Map.keys(map), Map.values(map)}
sequence_of(values)
|> bind(&(Enum.zip(keys, &1) |> Enum.into(Map.new)))
end
end
iex(1)> "ab" |> parse(%{first: "a", last: "b"}))
{:ok, %{first: "a", last: "b"}}
element = [while(ASCII.upper, 1), while(ASCII.lower)]
!
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
!
reference = %{element: element, quantity: quantity}
!
formula = repeat(reference, at_least: 1)
!
parse("H2O", formula) |> IO.inspect
# {:ok, [%{element: ["H", ""], quantity: 2},
# %{element: ["O", ""], quantity: 1}]}
!
# Almost...
# parser join(p, joiner  ""),
# as: bind(p, &Enum.join(&1, joiner))
!
element = [while(ASCII.upper, 1), while(ASCII.lower)]
|> join
!
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
!
reference = %{element: element, quantity: quantity}
!
formula = repeat(reference, at_least: 1)
!
parse("H2O", formula) |> IO.inspect
# {:ok, [%{element: "H", quantity: 2},
# %{element: "O", quantity: 1}]}
!
# Yahoooo!!!
element = [while(ASCII.upper, 1), while(ASCII.lower)]
|> join
!
# Bub a `quantity` is a number greater than zero!
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
!
reference = %{element: element, quantity: quantity}
!
formula = repeat(reference, at_least: 1)
!
parse("Na0", formula) |> IO.inspect
# {:ok, [%{element: "Na", quantity: 0}]}
!
# Ouch...
# ...
# A `quantity` is a number greater than zero
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
|> only_if(&(&1 > 0))
!
# ...
!
parse("Na0", formula) |> IO.inspect
# {:error, "0 is not acceptable at 1:3"}
# ...
# A `quantity` is a number greater than zero
!
error_message = "quantity must be greather than 0 %AT%"
!
greater_than_zero = &{&1 > 0, error_message}
!
quantity = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
|> maybe(default: 1)
|> only_if(greater_than_zero))
!
# ...
!
parse("Na0", formula) |> IO.inspect
# {:error, "quantity must be greather than 0 at 1:3"}
# Parse something like `(1, (2, 3))`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
# We need to name something that is not yet defined,
# actually we need to name something in its definition
!
list = one_of([number, ???])
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.round_brackets)
# In module Paco.Parser...
!
parser recursive(f) do
fn state, this ->
box(f.(this)).parse.(state, this)
end
end
# Parse something like `(1, (2, 3))`
!
number = while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
!
list = recursive(fn(list) ->
one_of([number, list])
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.round_brackets)
end)
!
parse("(1, 2)", list) |> IO.inspect
# {:ok, [1, 2]}
!
parse("(1, (2, 3))", list) |> IO.inspect
# {:ok, [1, [2, 3]]}
defmodule ListOfLists do
use Paco
alias Paco.ASCII
!
parser number do
while(ASCII.digit, at_least: 1)
|> bind(&String.to_integer/1)
end
!
parser list do
one_of([number, list])
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.round_brackets)
end
end
!
Paco.parse("1", ListOfLists.number) |> IO.inspect
# {:ok, 1}
# In module Paco...
!
defmacro __using__(_) do
quote do
import Paco.Macro.ParserModuleDefinition
import Paco.Parser
!
Module.register_attribute(__MODULE__,
:paco_parsers,
accumulate: true)
!
@before_compile Paco
end
end
# In module Paco...
!
defmacro __before_compile__(env) do
root_parser = pick_root_parser_between(
Module.get_attribute(env.module, :paco_parsers)
|> Enum.reverse
)
!
quote do
def parse(s, opts  []) do
Paco.parse(s, apply(__MODULE__, unquote(root_parser), []), opts)
end
!
def parse!(s, opts  []) do
Paco.parse!(s, apply(__MODULE__, unquote(root_parser), []), opts)
end
end
end
# Everything we saw until now works with streams of text!
!
["a", "b", "", "ab", "", "a", "", "", "b", "", ""]
|> Paco.Stream.parse(lit("ab"))
|> Enum.to_list
|> IO.inspect
# ["ab", "ab", "ab"]
!
[~s|{"foo|, ~s|": "bar"|, ~s|}[1, 2|, ~s|, 3]|]
|> Paco.Parser.JSON.stream
|> Enum.to_list
|> IO.inspect
# [%{"foo" => "bar"}, [1, 2, 3]]
parser lit(s) do
fn %State{at: from, text: text, stream: stream} = state, this ->
case Paco.String.consume(text, s, from) do
{tail, _, to, at} ->
%Success{from: from, to: to, at: at, tail: tail, result: s}
!
{:not_enough, _, _, _, _} when is_pid(stream) ->
wait_for_more_and_continue(state, this)

{_, _, _, _, {n, _, _}} ->
%Failure{at: from, tail: text, expected: s, rank: n+1}
end
end
end
!
defp wait_for_more_and_continue(state, this) do
%State{text: text, stream: stream} = state
send(stream, {self, :more})
receive do
{:load, more_text} ->
this.parse.(%State{state|text: text <> more_text}, this)
:halted ->
# The stream is over, switching to a non stream mode
# is the same as to tell the parser to behave knowing
# that more input will never come
this.parse.(%State{state|stream: nil}, this)
end
end
defmodule Paco.Parser.JSON do
alias Paco.ASCII
use Paco
!
root parser all, do: one_of([object, array])
!
parser object do
pair(string, value, separated_by: ASCII.colon)
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.curly_brackets)
|> bind(&to_map/1)
end
!
parser array do
value
|> separated_by(ASCII.comma)
|> surrounded_by(ASCII.square_brackets)
end
# ...
defmodule Paco.Parser.JSON do
# ...
!
parser value do
one_of([
string,
number,
object,
array,
literal_true,
literal_false,
literal_null])
end
!
parser string do
between(ASCII.double_quotes, escaped_with: "", strip: false)
|> bind(&replace_escapes_in_string/1)
end
# ...
defmodule Paco.Parser.JSON do
# ...
!
parser literal_true, do: lit("true") |> replace_with(true)
parser literal_false, do: lit("false") |> replace_with(false)
parser literal_null, do: lit("null") |> replace_with(nil)
!
# ...
end
Settings:
duration: 1.0 s
!
## Paco.Benchmark.JSON
[00:21:14] 1/4: poison small
[00:21:16] 2/4: poison medium
[00:21:18] 3/4: paco small
[00:21:21] 4/4: paco medium
!
Finished in 8.78 seconds
!
## Paco.Benchmark.JSON
poison small 100000 14.72 µs/op
poison medium 10000 144.58 µs/op
paco small 5000 493.32 µs/op
paco medium 500 4152.14 µs/op
✓
✓
☕
☕
👎
$
Parse Everything With Elixir

Parse Everything With Elixir

  • 1.
  • 2.
  • 3.
  • 10.
    Interactive Elixir (1.1.0) iex(1)>import Paco nil iex(2)> import Paco.Parser nil iex(3)> parse("a", lit("a")) {:ok, "a"} iex(4)> parse("a", lit("a"), format: :raw) %Paco.Success{ from: {0, 1, 1}, to: {0, 1, 1}, at: {1, 1, 2}, result: "a", tail: "", ...}
  • 11.
    iex(5)> parse("aaa", lit("aaa")) {:ok,"aaa"} iex(6)> "aaa" |> parse(lit("aaa")) {:ok, "aaa"} iex(7)> "aaa" |> parse(lit("a")) {:ok, "a"} iex(8)> "aaa" |> parse(lit("a"), format: :raw) %Paco.Success{ from: {0, 1, 1}, to: {0, 1, 1}, at: {1, 1, 2}, result: "a", tail: "aa", ...}
  • 12.
    iex(9)> "b" |>parse(lit("a")) {:error, "expected "a" at 1:1 but got "b""} iex(10)> "b" |> parse(lit("a"), format: :raw) %Paco.Failure{ at: {0, 1, 1}, expected: "a", tail: "b", ...}
  • 13.
    iex(1)> "aaa" |>parse(any) {:ok, "a"} iex(2)> "aaa" |> parse(any(1)) {:ok, "a"} iex(3)> "aaa" |> parse(any(2)) {:ok, "aa"} iex(4)> "a" |> parse(any(2)) {:error, "expected exactly 2 characters at 1:1 but got "a""} iex(5)> "aaa" |> parse(any(at_least: 2)) {:ok, "aaa"} iex(6)> "aaa" |> parse(any(at_most: 2)) {:ok, "aa"}
  • 14.
    iex(1)> "bbabcd" |>parse(while("abc")) {:ok, "bbabc"} iex(2)> "xxx" |> parse(while("abc")) {:ok, ""} iex(3)> "xxx" |> parse(while("abc", at_least: 2)) {:error, "expected at least 2 characters in alphabet "abc" at 1:1 but got "xx""} iex(4)> import Paco.ASCII, only: [lowercase?: 1] iex(5)> "abCD" |> parse(while(&lowercase?/1)) {:error, "ab"} iex(6)> "abCD" |> parse(while(&lowercase?/1, at_least: 3)) {:error, "expected at least 3 lowercase characters at 1:1 but got "abC""}
  • 15.
    iex(1)> "abc" |>parse(until("c")) {:ok, "ab"} iex(2)> "abcdc" |> parse(until("c", escaped_with: "")) {:ok, "abcd"} iex(3)> "abcdc" |> parse(until("c", escaped_with: "", keep_escape: true)) {:ok, "abcd"} iex(4)> "abc" |> parse(until("d")) {:error, "expected something ended by "d" at 1:1 but got "abc""} iex(5)> "abc" |> parse(until("d"), eof: true) {:error, "abc"}
  • 16.
    iex(1)> "ab" |>parse(sequence_of([lit("a"), lit("b")])) {:ok, ["a", "b"]} iex(2)> "ac" |> parse(sequence_of([lit("a"), lit("b")])) {:error, "expected "b" at 1:2 but got "c""}} iex(3)> ab = sequence_of([lit("a"), lit("b")]) %Paco.Parser{...} iex(4)> "abc" |> parse(sequence_of([ab, lit("c")])) {:ok, [["a", "b"], "c"]} iex(5)> "xxx" |> parse(sequence_of([ab, lit("c")])) {:error, "expected "a" at 1:1 but got "x""} iex(6)> "axx" |> parse(sequence_of([ab, lit("c")])) {:error, "expected "b" at 1:2 but got "x""} iex(7)> "abx" |> parse(sequence_of([ab, lit("c")])) {:error, "expected "c" at 1:3 but got "x""}
  • 17.
    iex(1)> "a" |>parse(one_of([lit("a"), lit("b")])) {:ok, "a"} iex(2)> "b" |> parse(one_of([lit("a"), lit("b")])) {:ok, "b"} iex(4)> # farthest failure (higher rank) wins nil iex(3)> "ab" |> parse(one_of([lit("ac"), lit("bc")])) {:error, "expected "ac" at 1:1 but got "ab""} iex(6)> # failures with same rank are composed nil iex(5)> "ab" |> parse(one_of([lit("ac"), lit("ad")])) {:error, "expected one of ["ac", "ad"] at 1:1 but got "ab""}
  • 18.
    iex(1)> "aaa" |>parse(repeat(lit("a"))) {:ok, ["a", "a", "a"]} iex(2)> "aaa" |> parse(repeat(lit("a"), 2)) {:ok, ["a", "a"]} iex(4)> "aaa" |> parse(repeat(lit("a"), at_most: 2)) {:ok, ["a", "a"]} iex(3)> "aaa" |> parse(repeat(lit("a"), at_least: 4)) {:error, ""expected "a" at 1:2 but got the end of input"} iex(6)> "abba" |> parse(repeat(one_of([lit("a"), lit("b")]))) {:ok, ["a", "b", "b", "a"]}
  • 19.
    defmodule Paco.ASCII do ! @upper["A","B","C","D","E",...,"Z"] ! @classes [... {:upper, :upper?, @upper}, ...] for {class, is_class, elements} <- @classes do def unquote(class)(), do: unquote(elements) for element <- elements do def unquote(is_class)(<<unquote(element)>>), do: true end def unquote(is_class)(_), do: false end ! # def upper, do: @upper # def upper?("A"), do: true # def upper?("B"), do: true # ... # def upper?(_), do: false
  • 20.
    ws = while(ASCII.ws) ! hello= lit("Hello") separator = sequence_of([ws, lit(","), ws]) what = while(ASCII.letter, at_least: 1) terminator = sequence_of([ws, lit("!")]) ! greetings = sequence_of([hello, separator, what, terminator]) ! parse("Hello,World!", greetings) |> IO.inspect # {:ok, ["Hello", ["", ",", ""], "World", ["", "!"]]} ! parse("Hello, BEAM!", greetings) |> IO.inspect # {:ok, ["Hello", ["", ",", " "], "BEAM", ["", "!"]]}
  • 21.
    # Good, notgreat: skip everything that is not interesting ! ws = while(ASCII.ws) ! hello = lit("Hello") |> skip separator = sequence_of([ws, lit(","), ws]) |> skip what = while(ASCII.letter, at_least: 1) terminator = sequence_of([ws, lit("!")]) |> skip ! greetings = sequence_of([hello, separator, what, terminator]) ! parse("Hello,World!", greetings) |> IO.inspect # {:ok, ["World"]} ! parse("Hello, BEAM!", greetings) |> IO.inspect # {:ok, ["BEAM"]}
  • 22.
    # Not everyoneare so loud, `!` should be optional ! ws = while(ASCII.ws) ! hello = lit("Hello") |> skip separator = sequence_of([ws, lit(","), ws]) |> skip what = while(ASCII.letter, at_least: 1) terminator = sequence_of([ws, lit("!")]) |> maybe ! greetings = sequence_of([hello, separator, what, terminator]) ! parse("Hello, BEAM!", greetings) |> IO.inspect # {:ok, ["BEAM"]} ! parse("Hello, BEAM", greetings) |> IO.inspect # {:ok, ["BEAM"]}
  • 23.
    # Let's getrid of non significant whitespaces with lex(s) ! # In module Paco.Parser... ! parser lex(s), as: lit(s) |> surrounded_by(maybe(whitespaces)) ! parser surrounded_by(p, left, right), as: sequence_of([skip(left), p, skip(right)])
  • 24.
    # Use lexLuke! ! ws = while(ASCII.ws) ! hello = lit("Hello") |> skip what = while(ASCII.letter, at_least: 1) separator = lex(",") |> skip terminator = lex("!") |> maybe ! greetings = sequence_of([hello, separator, what, terminator]) ! parse("Hello, BEAM!", greetings) |> IO.inspect # {:ok, ["BEAM"]} ! parse("Hello, BEAM", greetings) |> IO.inspect # {:ok, ["BEAM"]}
  • 25.
    # It's commonto have something non significant # that follows or precedes something significant ! # In module Paco.Parser... ! parser followed_by(p, right), as: sequence_of([p, skip(right)]) ! parser preceded_by(p, right), as: sequence_of([skip(left), p])
  • 26.
    # An alternativeand shorter version ! what = while(ASCII.letter, at_least: 1) ! greetings = what |> preceded_by(lit("Hello") |> followed_by(lex(","))) |> followed_by(maybe(lex("!"))) ! ! parse("Hello, BEAM!", greetings) |> IO.inspect # {:ok, ["BEAM"]} ! parse("Hello, BEAM", greetings) |> IO.inspect # {:ok, ["BEAM"]}
  • 27.
    # Parse asequence of numbers separated by `+` or `-` ! number = while(ASCII.digit, at_least: 1) operator = one_of([lex("+"), lex("-")]) ! expression = number |> separated_by(operator) ! parse("1", expression) |> IO.inspect # {:ok, ["1"]} ! parse("1 + 2", expression) |> IO.inspect # {:ok, ["1", "2"]} ! parse("1 + 2 - 3", expression) |> IO.inspect # {:ok, ["1", "2", "3"]} ! # Small problem... to compute the value we need the operators!
  • 28.
    # Parse asequence of numbers separated by `+` or `-` ! number = while(ASCII.digit, at_least: 1) operator = one_of([lex("+"), lex("-")]) ! expression = number |> separated_by(keep(operator)) ! parse("1", expression) |> IO.inspect # {:ok, ["1"]} ! parse("1 + 2", expression) |> IO.inspect # {:ok, ["1", "+", "2"]} ! parse("1 + 2 - 3", expression) |> IO.inspect # {:ok, ["1", "+", "2", "-", "3"]} ! # Ok, but we need numbers not strings
  • 29.
    # In modulePaco.Parser... parser bind(p, f) do fn state, _ -> case p.parse.(state, p) do %Success{result: result} = success -> case f.(result, success) do %Failure{} = failure -> failure %Success{} = success -> success result -> %Success{success|result: result} end %Failure{} = failure -> failure end end end
  • 30.
    # Parse asequence of numbers separated by `+` or `-` ! number = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) ! operator = one_of([lex("+"), lex("-")]) ! expression = number |> separated_by(keep(operator)) ! parse("1 + 2 - 3", expression) |> IO.inspect # {:ok, [1, "+", 2, "-", 3]} ! # Missing only the last step... compute the result :-)
  • 31.
    # Parse asequence of numbers separated by `+` or `-` ! number = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) ! operator = one_of([lex("+"), lex("-")]) ! expression = number |> separated_by(keep(operator)) |> bind(&Paco.Transform.separated_by(&1, fn("+", n, m) -> n + m ("-", n, m) -> n - m end)) ! ! parse("1 + 2 - 3", expression) |> IO.inspect # {:ok, 0]}
  • 32.
    # Parse a{n}b{n}c{n}where n ∈ ℕ ! # If you knew the `n` (ex. 3) it would be easy ! p = sequence_of([while("a", 3), while("b", 3), while("c", 3)]) ! parse("aaabbbccc", p) |> IO.inspect # {:ok, ["aaa", "bbb", "ccc"]} ! # We need to be able to peek ahead and then create a parser # with that knowledge
  • 33.
    # In modulePaco.Parser... ! parser peek(box(p)) do fn %State{at: at, text: text} = state, _ -> case p.parse.(state, p) do %Success{result: result} -> %Success{from: at, to: at, at: at, tail: text, result: result} %Failure{} = failure -> failure end end end
  • 34.
    # In modulePaco.Parser... ! parser then(p, f) when is_function(f), as: bind(p, f) |> bind(fn(p, _, s) -> p.parse.(s, p) end)
  • 35.
    # Parse a{n}b{n}c{n}where n ∈ ℕ ! p = peek(while("a")) |> then(fn(a) -> len = String.length(a) sequence_of([while("a", len), while("b", len), while("c", len)]) end) ! parse("aaabbbccc", p) |> IO.inspect # {:ok, ["aaa", "bbb", "ccc"]} ! parse("aaabbccc", p) |> IO.inspect # {:error, "expected exactly 3 characters in alphabet "b" at 1:4 but got "bbc""}
  • 36.
    # An `element`is a word beginning with one uppercase letter # followed by zero or more lowercase letters element = sequence_of([while(ASCII.upper, 1), while(ASCII.lower)]) ! # A `quantity` is a number greater than zero # If the quantity is omitted assume the value of 1 as default quantity = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) |> maybe(default: 1) ! # A `reference` is an element optionally followed by a quantity reference = sequence_of([element, quantity]) ! formula = repeat(reference, at_least: 1)
  • 37.
    parse("H2O", formula) |>IO.inspect # {:ok, [[["H", ""], 2], [["O", ""], 1]]} ! # That's right but the output format sucks! ! # What we really want is something like # {:ok, [%{element: "H", quantity: 2}, %{element: "0", quantity: 1}] ! # Is that possible???
  • 38.
    defprotocol Paco.Parsable do @moduledoc""" A protocol that converts terms into Paco parsers """ @fallback_to_any true @doc """ Returns a parser that parses `t` and keeps the shape of `t` """ @spec to_parser(t) :: Paco.Parser.t def to_parser(t) end
  • 39.
    defimpl Paco.Parsable, for:BitString do import Paco.Parser def to_parser(s) when is_binary(s) do lit(s) end def to_parser(s) do raise Protocol.UndefinedError, protocol: @protocol, value: s end end iex(1)> "aaa" |> parse(lit("aaa")) {:ok, "aaa"} iex(2)> "aaa" |> parse("aaa") {:ok, "aaa"}
  • 40.
    defimpl Paco.Parsable, for:List do import Paco.Parser def to_parser(l) do sequence_of(l) end end iex(1)> "ab" |> parse(sequence_of([lit("a"), lit("b")])) {:ok, ["a", "b"]} iex(2)> "ab" |> parse(sequence_of(["a", "b"])) {:ok, ["a", "b"]} iex(3)> "ab" |> parse(["a", "b"]) {:ok, ["a", "b"]}
  • 41.
    defimpl Paco.Parsable, for:Tuple do import Paco.Parser def to_parser(tuple) do sequence_of(Tuple.to_list(tuple)) |> bind(&List.to_tuple/1) end end iex(1)> "ab" |> parse({"a", "b"})) {:ok, {"a", "b"}}
  • 42.
    defimpl Paco.Parsable, for:Map do import Paco.Parser def to_parser(tuple) do {keys, values} = {Map.keys(map), Map.values(map)} sequence_of(values) |> bind(&(Enum.zip(keys, &1) |> Enum.into(Map.new))) end end iex(1)> "ab" |> parse(%{first: "a", last: "b"})) {:ok, %{first: "a", last: "b"}}
  • 43.
    element = [while(ASCII.upper,1), while(ASCII.lower)] ! quantity = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) |> maybe(default: 1) ! reference = %{element: element, quantity: quantity} ! formula = repeat(reference, at_least: 1) ! parse("H2O", formula) |> IO.inspect # {:ok, [%{element: ["H", ""], quantity: 2}, # %{element: ["O", ""], quantity: 1}]} ! # Almost...
  • 44.
    # parser join(p,joiner ""), # as: bind(p, &Enum.join(&1, joiner)) ! element = [while(ASCII.upper, 1), while(ASCII.lower)] |> join ! quantity = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) |> maybe(default: 1) ! reference = %{element: element, quantity: quantity} ! formula = repeat(reference, at_least: 1) ! parse("H2O", formula) |> IO.inspect # {:ok, [%{element: "H", quantity: 2}, # %{element: "O", quantity: 1}]} ! # Yahoooo!!!
  • 45.
    element = [while(ASCII.upper,1), while(ASCII.lower)] |> join ! # Bub a `quantity` is a number greater than zero! quantity = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) |> maybe(default: 1) ! reference = %{element: element, quantity: quantity} ! formula = repeat(reference, at_least: 1) ! parse("Na0", formula) |> IO.inspect # {:ok, [%{element: "Na", quantity: 0}]} ! # Ouch...
  • 46.
    # ... # A`quantity` is a number greater than zero quantity = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) |> maybe(default: 1) |> only_if(&(&1 > 0)) ! # ... ! parse("Na0", formula) |> IO.inspect # {:error, "0 is not acceptable at 1:3"}
  • 47.
    # ... # A`quantity` is a number greater than zero ! error_message = "quantity must be greather than 0 %AT%" ! greater_than_zero = &{&1 > 0, error_message} ! quantity = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) |> maybe(default: 1) |> only_if(greater_than_zero)) ! # ... ! parse("Na0", formula) |> IO.inspect # {:error, "quantity must be greather than 0 at 1:3"}
  • 48.
    # Parse somethinglike `(1, (2, 3))` ! number = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) ! # We need to name something that is not yet defined, # actually we need to name something in its definition ! list = one_of([number, ???]) |> separated_by(ASCII.comma) |> surrounded_by(ASCII.round_brackets)
  • 49.
    # In modulePaco.Parser... ! parser recursive(f) do fn state, this -> box(f.(this)).parse.(state, this) end end
  • 50.
    # Parse somethinglike `(1, (2, 3))` ! number = while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) ! list = recursive(fn(list) -> one_of([number, list]) |> separated_by(ASCII.comma) |> surrounded_by(ASCII.round_brackets) end) ! parse("(1, 2)", list) |> IO.inspect # {:ok, [1, 2]} ! parse("(1, (2, 3))", list) |> IO.inspect # {:ok, [1, [2, 3]]}
  • 51.
    defmodule ListOfLists do usePaco alias Paco.ASCII ! parser number do while(ASCII.digit, at_least: 1) |> bind(&String.to_integer/1) end ! parser list do one_of([number, list]) |> separated_by(ASCII.comma) |> surrounded_by(ASCII.round_brackets) end end ! Paco.parse("1", ListOfLists.number) |> IO.inspect # {:ok, 1}
  • 52.
    # In modulePaco... ! defmacro __using__(_) do quote do import Paco.Macro.ParserModuleDefinition import Paco.Parser ! Module.register_attribute(__MODULE__, :paco_parsers, accumulate: true) ! @before_compile Paco end end
  • 53.
    # In modulePaco... ! defmacro __before_compile__(env) do root_parser = pick_root_parser_between( Module.get_attribute(env.module, :paco_parsers) |> Enum.reverse ) ! quote do def parse(s, opts []) do Paco.parse(s, apply(__MODULE__, unquote(root_parser), []), opts) end ! def parse!(s, opts []) do Paco.parse!(s, apply(__MODULE__, unquote(root_parser), []), opts) end end end
  • 54.
    # Everything wesaw until now works with streams of text! ! ["a", "b", "", "ab", "", "a", "", "", "b", "", ""] |> Paco.Stream.parse(lit("ab")) |> Enum.to_list |> IO.inspect # ["ab", "ab", "ab"] ! [~s|{"foo|, ~s|": "bar"|, ~s|}[1, 2|, ~s|, 3]|] |> Paco.Parser.JSON.stream |> Enum.to_list |> IO.inspect # [%{"foo" => "bar"}, [1, 2, 3]]
  • 55.
    parser lit(s) do fn%State{at: from, text: text, stream: stream} = state, this -> case Paco.String.consume(text, s, from) do {tail, _, to, at} -> %Success{from: from, to: to, at: at, tail: tail, result: s} ! {:not_enough, _, _, _, _} when is_pid(stream) -> wait_for_more_and_continue(state, this)
 {_, _, _, _, {n, _, _}} -> %Failure{at: from, tail: text, expected: s, rank: n+1} end end end !
  • 56.
    defp wait_for_more_and_continue(state, this)do %State{text: text, stream: stream} = state send(stream, {self, :more}) receive do {:load, more_text} -> this.parse.(%State{state|text: text <> more_text}, this) :halted -> # The stream is over, switching to a non stream mode # is the same as to tell the parser to behave knowing # that more input will never come this.parse.(%State{state|stream: nil}, this) end end
  • 58.
    defmodule Paco.Parser.JSON do aliasPaco.ASCII use Paco ! root parser all, do: one_of([object, array]) ! parser object do pair(string, value, separated_by: ASCII.colon) |> separated_by(ASCII.comma) |> surrounded_by(ASCII.curly_brackets) |> bind(&to_map/1) end ! parser array do value |> separated_by(ASCII.comma) |> surrounded_by(ASCII.square_brackets) end # ...
  • 59.
    defmodule Paco.Parser.JSON do #... ! parser value do one_of([ string, number, object, array, literal_true, literal_false, literal_null]) end ! parser string do between(ASCII.double_quotes, escaped_with: "", strip: false) |> bind(&replace_escapes_in_string/1) end # ...
  • 60.
    defmodule Paco.Parser.JSON do #... ! parser literal_true, do: lit("true") |> replace_with(true) parser literal_false, do: lit("false") |> replace_with(false) parser literal_null, do: lit("null") |> replace_with(nil) ! # ... end
  • 61.
    Settings: duration: 1.0 s ! ##Paco.Benchmark.JSON [00:21:14] 1/4: poison small [00:21:16] 2/4: poison medium [00:21:18] 3/4: paco small [00:21:21] 4/4: paco medium ! Finished in 8.78 seconds ! ## Paco.Benchmark.JSON poison small 100000 14.72 µs/op poison medium 10000 144.58 µs/op paco small 5000 493.32 µs/op paco medium 500 4152.14 µs/op
  • 62.