350 lines
8.4 KiB
Scheme
350 lines
8.4 KiB
Scheme
(library
|
|
(parser-combinators)
|
|
|
|
(export parse
|
|
new-state
|
|
get-state
|
|
update-state
|
|
parse-value
|
|
error-m
|
|
success?
|
|
pure
|
|
>>=
|
|
alt
|
|
peek
|
|
>>
|
|
lift
|
|
lift2
|
|
seq
|
|
one-of
|
|
opt
|
|
opt-default
|
|
many*
|
|
many+
|
|
if-m
|
|
when-m
|
|
unless-m
|
|
<*
|
|
times
|
|
upto
|
|
many-range
|
|
surround
|
|
getchar
|
|
getchars
|
|
accept-char
|
|
charset
|
|
neg-charset
|
|
lit
|
|
eof
|
|
parse-m
|
|
<-)
|
|
|
|
(import (rnrs)
|
|
(fmt fmt)
|
|
(srfi s8 receive))
|
|
|
|
;;--------------------------------
|
|
;; Hand rolled state + parser monad
|
|
;;
|
|
;; The state must be immutable for this to work. So we can't use hash
|
|
;; tables etc.
|
|
|
|
;;--------------------------------
|
|
;; Coordinates
|
|
(define-record-type coordinate
|
|
(fields (immutable line coord-line)
|
|
(immutable char coord-char)))
|
|
|
|
(define (new-coordinate)
|
|
(make-coordinate 1 0))
|
|
|
|
(define (inc-line c n)
|
|
(make-coordinate (+ n (coord-line c)) 1))
|
|
|
|
(define (inc-char c n)
|
|
(make-coordinate (coord-line c) (+ n (coord-char c))))
|
|
|
|
(define (coord-consume c str)
|
|
(let loop ((newl (coord-line c))
|
|
(newc (coord-char c))
|
|
(input (string->list str)))
|
|
(if (null? input)
|
|
(make-coordinate newl newc)
|
|
(let ((ch (car input)))
|
|
(if (eq? #\newline ch)
|
|
(loop (+ 1 newl) 0 (cdr input))
|
|
(loop newl (+ 1 newc) (cdr input)))))))
|
|
|
|
;;--------------------------------
|
|
;; Parse context
|
|
(define-record-type parse-state
|
|
(fields (immutable success-or-error st-soe)
|
|
(immutable coord st-coord)
|
|
(immutable input st-input)))
|
|
|
|
(define (new-state input)
|
|
(make-parse-state #t (new-coordinate) input))
|
|
|
|
(define (st-update-soe st soe)
|
|
(make-parse-state soe (st-coord st) (st-input st)))
|
|
|
|
(define (st-update-coord st c)
|
|
(make-parse-state (st-soe st) c (st-input st)))
|
|
|
|
(define (st-update-input st in)
|
|
(make-parse-state (st-soe st) (st-coord st) in))
|
|
|
|
(define (st-consume st cs)
|
|
(make-parse-state (st-soe st)
|
|
(st-coord st)
|
|
(let ((old (st-input st)))
|
|
(substring old (string-length cs) (string-length old)))))
|
|
|
|
;;--------------------------------
|
|
;; A vanilla state monad, carrying around a parse-state.
|
|
;;
|
|
;; st -> v, st
|
|
|
|
;; m a -> string -> v, state
|
|
(define (parse ma input)
|
|
(ma (new-state input)))
|
|
|
|
;; m st
|
|
(define (get-state)
|
|
(lambda (st)
|
|
(values st st)))
|
|
|
|
;; (st -> st) -> m ()
|
|
(define (update-state fn)
|
|
(lambda (st)
|
|
(values '() (fn st))))
|
|
|
|
(define (parse-value ma input)
|
|
(receive (v st) (parse ma input)
|
|
v))
|
|
|
|
(define (error-m fmt-doc)
|
|
(update-state
|
|
(lambda (st)
|
|
(st-update-soe st (fmt #f fmt-doc)))))
|
|
|
|
(define (success? st)
|
|
(eq? (st-soe st) #t))
|
|
|
|
;; a -> m a
|
|
(define (pure v)
|
|
(lambda (st)
|
|
(values v st)))
|
|
|
|
;; m a -> (a -> m b) -> m b
|
|
(define (>>= ma fn)
|
|
(lambda (st)
|
|
(receive (v st2) (ma st)
|
|
(if (success? st2)
|
|
((fn v) st2)
|
|
(values v st2)))))
|
|
|
|
;; m a -> m a -> m a
|
|
(define (alt ma1 ma2)
|
|
(lambda (st)
|
|
(receive (v st2) (ma1 st)
|
|
(if (success? st2)
|
|
(values v st2)
|
|
(ma2 st)))))
|
|
|
|
;; m a -> m a (but doesn't modify state)
|
|
(define (peek ma)
|
|
(lambda (st)
|
|
(receive (v st2) (ma st)
|
|
(values v (st-update-soe st (st-soe st2))))))
|
|
|
|
;;--------------------------------
|
|
;; General monad combinators
|
|
;;
|
|
;; These only use fail_, pure, >>=, alt
|
|
|
|
;; m a -> m b -> m b
|
|
(define (>> ma mb)
|
|
(>>= ma (lambda (v) mb)))
|
|
|
|
;; (a -> b) -> m a -> m b
|
|
(define (lift fn ma)
|
|
(>>= ma (lambda (a)
|
|
(pure (fn a)))))
|
|
|
|
;; (a -> b -> c) -> m a -> m b -> m c
|
|
(define (lift2 fn ma mb)
|
|
(>>= ma (lambda (a)
|
|
(>>= mb (lambda (b)
|
|
(pure (fn a b)))))))
|
|
|
|
;; [m a] -> m [a]
|
|
(define (seq . ms)
|
|
(let loop ((ms ms))
|
|
(if (null? ms)
|
|
(pure '())
|
|
(lift2 cons (car ms) (loop (cdr ms))))))
|
|
|
|
;; m a -> m a -> m a
|
|
(define (one-of . xs)
|
|
(let loop ((xs xs))
|
|
(if (null? xs)
|
|
(error-m (dsp "one-of found no match"))
|
|
(alt (car xs) (loop (cdr xs))))))
|
|
|
|
;; m a -> m [a]
|
|
(define (opt ma)
|
|
(alt (lift list ma)
|
|
(pure '())))
|
|
|
|
;; m a -> m b -> m (a|b)
|
|
(define (opt-default ma default)
|
|
(alt ma (pure default)))
|
|
|
|
(define (mk-conser v)
|
|
(lambda (vs) (cons v vs)))
|
|
|
|
;; m a -> m [a]
|
|
(define (many* ma)
|
|
(alt (>>= ma
|
|
(lambda (v)
|
|
(lift (mk-conser v)
|
|
(many* ma))))
|
|
(pure '())))
|
|
|
|
;; FIXME: why doesn't this work? (blows stack)
|
|
;; (defun many* (ma)
|
|
;; (alt (lift2 #'cons ma (many* ma))
|
|
;; (pure nil)))
|
|
|
|
(define (many+ ma)
|
|
(lift2 cons ma (many* ma)))
|
|
|
|
;; Bool -> m a -> m b -> Either (m a) (m b)
|
|
(define (if-m p ma mb)
|
|
(if p ma mb))
|
|
|
|
;; Bool -> m a -> m a
|
|
(define (when-m p ma)
|
|
(if-m p ma (error-m (dsp "when-m failed"))))
|
|
|
|
(define (unless-m p ma)
|
|
(if-m p (error-m (dsp "unless-m failed")) ma))
|
|
|
|
;; m a -> m b -> m a
|
|
(define (<* ma mb)
|
|
(>>= ma (lambda (a)
|
|
(>> mb (pure a)))))
|
|
|
|
;;--------------------------------
|
|
;; Combinators that use parse-m
|
|
|
|
(define (times n ma)
|
|
(if (zero? n)
|
|
(pure '())
|
|
(lift2 cons ma (times (- n 1) ma))))
|
|
|
|
(define (upto max ma)
|
|
(if (zero? max)
|
|
(pure '())
|
|
(>>= (opt ma)
|
|
(lambda (a)
|
|
(if (null? a)
|
|
(pure '())
|
|
(lift (mk-conser (car a))
|
|
(upto (- max 1) ma)))))))
|
|
|
|
(define (many-range min max ma)
|
|
(>>= (times min ma)
|
|
(lambda (vs)
|
|
(>>= (upto (- max min) ma)
|
|
(lambda (os)
|
|
(pure (append vs os)))))))
|
|
|
|
;; ma -> mb -> mb
|
|
(define (surround ma mb)
|
|
(>> ma (<* mb ma)))
|
|
|
|
;;--------------------------------
|
|
;; Basic combinators
|
|
;;
|
|
;; We should try and keep these to a minimum. These are allowed to
|
|
;; know the internals of the monad.
|
|
(define getchar
|
|
(lambda (st)
|
|
(let ((input (st-input st)))
|
|
(if (zero? (string-length input))
|
|
(values #f (st-update-soe st "end of input for getchar"))
|
|
(let ((c (string-ref input 0)))
|
|
(values c (st-consume st (list->string (list c)))))))))
|
|
|
|
(define (getchars n)
|
|
(lambda (st)
|
|
(let ((input (st-input st)))
|
|
(if (>= (string-length input) n)
|
|
(let ((cs (substring input 0 n)))
|
|
(values cs (st-consume st cs)))
|
|
(values '() (st-update-soe st "insufficient input for getchars"))))))
|
|
|
|
;;--------------------------------
|
|
;; Higher order combinators
|
|
;;
|
|
;; These should just use other combinators
|
|
|
|
(define (accept-char pred)
|
|
(>>= getchar
|
|
(lambda (c)
|
|
(when-m (pred c)
|
|
(pure c)))))
|
|
|
|
;; FIXME: slow
|
|
(define (charset str)
|
|
(let ((cs (string->list str)))
|
|
(accept-char (lambda (c)
|
|
(member c cs)))))
|
|
|
|
(define (neg-charset str)
|
|
(let ((cs (string->list str)))
|
|
(accept-char (lambda (c)
|
|
(not (member c cs))))))
|
|
|
|
(define (lit tok)
|
|
(let ((len (string-length tok)))
|
|
(>>= (getchars len)
|
|
(lambda (str)
|
|
(when-m (string=? tok str)
|
|
(pure tok))))))
|
|
|
|
#|
|
|
(defun token (str &optional sym)
|
|
(unless sym
|
|
(setf sym (symb-keyword str)))
|
|
(>> (lit str) (pure sym)))
|
|
|#
|
|
|
|
(define eof
|
|
(lambda (st)
|
|
(if (zero? (string-length (st-input st)))
|
|
(values #t st)
|
|
(error-m (dsp "eof expected no input (but there was)")))))
|
|
|
|
;;----------------------------------------------------------------
|
|
;; Imperative notation for when the combinators are cumbersome
|
|
|
|
(define-syntax parse-m
|
|
(syntax-rules (<-)
|
|
((_ (<- v ma) clauses ...)
|
|
(>>= ma
|
|
(lambda (v)
|
|
(parse-m clauses ...))))
|
|
|
|
((_ ma) ma)
|
|
|
|
((_ ma clauses ...)
|
|
(>> ma (parse-m clauses ...)))))
|
|
|
|
(define-syntax <-
|
|
(lambda (x)
|
|
(syntax-violation '<- "misplaced auxilliary keyword" x)))
|
|
)
|