From 5384d7535105a5a05ac387d2d9b52f70bc7aae7a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 22 Aug 2017 09:45:22 +0100 Subject: [PATCH] [functional-tests] Monadic parser combinators --- functional-tests/parser-combinators.scm | 348 ++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 functional-tests/parser-combinators.scm diff --git a/functional-tests/parser-combinators.scm b/functional-tests/parser-combinators.scm new file mode 100644 index 0000000..8dfb10f --- /dev/null +++ b/functional-tests/parser-combinators.scm @@ -0,0 +1,348 @@ +(library + (parser-combinators) + + (export parse + new-state + get-state + update-state + parse-value + error-m + success? + pure + >>= + alt + peek + >> + lift + lift2 + seq + one-of + opt + opt-default + many* + many+ + if-m + when-m + unless-m + <* + times + upto + many-range + surround + getchar + getchars + accept-char + charset + neg-charset + lit + eof + parse-m + <-) + + (import (rnrs) + (fmt fmt) + (srfi s8 receive)) + + ;;-------------------------------- + ;; Hand rolled state + parser monad + ;; + ;; The state must be immutable for this to work. So we can't use hash + ;; tables etc. + + ;;-------------------------------- + ;; Coordinates + (define-record-type coordinate + (fields (immutable line coord-line) + (immutable char coord-char))) + + (define (new-coordinate) + (make-coordinate 1 0)) + + (define (inc-line c n) + (make-coordinate (+ n (coord-line c)) 1)) + + (define (inc-char c n) + (make-coordinate (coord-line c) (+ n (coord-char c)))) + + (define (coord-consume c str) + (let loop ((newl (coord-line c)) + (newc (coord-char c)) + (input (string->list str))) + (if (null? input) + (make-coordinate newl newc) + (let ((ch (car input))) + (if (eq? #\newline ch) + (loop (+ 1 newl) 0 (cdr input)) + (loop newl (+ 1 newc) (cdr input))))))) + + ;;-------------------------------- + ;; Parse context + (define-record-type parse-state + (fields (immutable success-or-error st-soe) + (immutable coord st-coord) + (immutable input st-input))) + + (define (new-state input) + (make-parse-state #t (new-coordinate) input)) + + (define (st-update-soe st soe) + (make-parse-state soe (st-coord st) (st-input st))) + + (define (st-update-coord st c) + (make-parse-state (st-soe st) c (st-input st))) + + (define (st-update-input st in) + (make-parse-state (st-soe st) (st-coord st) in)) + + (define (st-consume st cs) + (make-parse-state (st-soe st) + (st-coord st) + (let ((old (st-input st))) + (substring old (string-length cs) (string-length old))))) + + ;;-------------------------------- + ;; A vanilla state monad, carrying around a parse-state. + ;; + ;; st -> v, st + + ;; m a -> string -> v, state + (define (parse ma input) + (ma (new-state input))) + + ;; m st + (define (get-state) + (lambda (st) + (values st st))) + + ;; (st -> st) -> m () + (define (update-state fn) + (lambda (st) + (values '() (fn st)))) + + (define (parse-value ma input) + (receive (v st) (parse ma input) + v)) + + (define (error-m fmt-doc) + (update-state + (lambda (st) + (st-update-soe st (fmt #f fmt-doc))))) + + (define (success? st) + (eq? (st-soe st) #t)) + + ;; a -> m a + (define (pure v) + (lambda (st) + (values v st))) + + ;; m a -> (a -> m b) -> m b + (define (>>= ma fn) + (lambda (st) + (receive (v st2) (ma st) + (if (success? st2) + ((fn v) st2) + (values v st2))))) + + ;; m a -> m a -> m a + (define (alt ma1 ma2) + (lambda (st) + (receive (v st2) (ma1 st) + (if (success? st2) + (values v st2) + (ma2 st))))) + + ;; m a -> m a (but doesn't modify state) + (define (peek ma) + (lambda (st) + (receive (v st2) (ma st) + (values v (st-update-soe st (st-soe st2)))))) + + ;;-------------------------------- + ;; General monad combinators + ;; + ;; These only use fail_, pure, >>=, alt + + ;; m a -> m b -> m b + (define (>> ma mb) + (>>= ma (lambda (v) mb))) + + ;; (a -> b) -> m a -> m b + (define (lift fn ma) + (>>= ma (lambda (a) + (pure (fn a))))) + + ;; (a -> b -> c) -> m a -> m b -> m c + (define (lift2 fn ma mb) + (>>= ma (lambda (a) + (>>= mb (lambda (b) + (pure (fn a b))))))) + + ;; [m a] -> m [a] + (define (seq . ms) + (let loop ((ms ms)) + (if (null? ms) + (pure '()) + (lift2 cons (car ms) (loop (cdr ms)))))) + + ;; m a -> m a -> m a + (define (one-of . xs) + (let loop ((xs xs)) + (if (null? xs) + (error-m (dsp "one-of found no match")) + (alt (car xs) (loop (cdr xs)))))) + + ;; m a -> m [a] + (define (opt ma) + (alt (lift list ma) + (pure '()))) + + ;; m a -> m b -> m (a|b) + (define (opt-default ma default) + (alt ma (pure default))) + + (define (mk-conser v) + (lambda (vs) (cons v vs))) + + ;; m a -> m [a] + (define (many* ma) + (alt (>>= ma + (lambda (v) + (lift (mk-conser v) + (many* ma)))) + (pure '()))) + + ;; FIXME: why doesn't this work? (blows stack) + ;; (defun many* (ma) + ;; (alt (lift2 #'cons ma (many* ma)) + ;; (pure nil))) + + (define (many+ ma) + (lift2 cons ma (many* ma))) + + ;; Bool -> m a -> m b -> Either (m a) (m b) + (define (if-m p ma mb) + (if p ma mb)) + + ;; Bool -> m a -> m a + (define (when-m p ma) + (if-m p ma (error-m (dsp "when-m failed")))) + + (define (unless-m p ma) + (if-m p (error-m (dsp "unless-m failed")) ma)) + + ;; m a -> m b -> m a + (define (<* ma mb) + (>>= ma (lambda (a) + (>> mb (pure a))))) + + ;;-------------------------------- + ;; Combinators that use parse-m + + (define (times n ma) + (if (zero? n) + (pure '()) + (lift2 cons ma (times (- n 1) ma)))) + + (define (upto max ma) + (if (zero? max) + (pure '()) + (>>= (opt ma) + (lambda (a) + (if (null? a) + (pure '()) + (lift (mk-conser (car a)) + (upto (- max 1) ma))))))) + + (define (many-range min max ma) + (>>= (times min ma) + (lambda (vs) + (>>= (upto (- max min) ma) + (lambda (os) + (pure (append vs os))))))) + + ;; ma -> mb -> mb + (define (surround ma mb) + (>> ma (<* mb ma))) + + ;;-------------------------------- + ;; Basic combinators + ;; + ;; We should try and keep these to a minimum. These are allowed to + ;; know the internals of the monad. + (define getchar + (lambda (st) + (let ((input (st-input st))) + (if (zero? (string-length input)) + (values #f (st-update-soe st "end of input for getchar")) + (let ((c (string-ref input 0))) + (values c (st-consume st (list->string (list c))))))))) + + (define (getchars n) + (lambda (st) + (let ((input (st-input st))) + (if (>= (string-length input) n) + (let ((cs (substring input 0 n))) + (values cs (st-consume st cs))) + (values '() (st-update-soe st "insufficient input for getchars")))))) + + ;;-------------------------------- + ;; Higher order combinators + ;; + ;; These should just use other combinators + + (define (accept-char pred) + (>>= getchar + (lambda (c) + (when-m (pred c) + (pure c))))) + + ;; FIXME: slow + (define (charset str) + (let ((cs (string->list str))) + (accept-char (lambda (c) + (member c cs))))) + + (define (neg-charset str) + (let ((cs (string->list str))) + (accept-char (lambda (c) + (not (member c cs)))))) + + (define (lit tok) + (let ((len (string-length tok))) + (>>= (getchars len) + (lambda (str) + (when-m (string=? tok str) + (pure tok)))))) + + #| + (defun token (str &optional sym) + (unless sym + (setf sym (symb-keyword str))) + (>> (lit str) (pure sym))) + |# + + (define (eof s) + (if (zero? (string-length s)) + (values #t #t "") + (error-m (dsp "eof expected no input (but there was)")))) + + ;;---------------------------------------------------------------- + ;; Imperative notation for when the combinators are cumbersome + + (define-syntax parse-m + (syntax-rules (<-) + ((_ (<- v ma) clauses ...) + (>>= ma + (lambda (v) + (parse-m clauses ...)))) + + ((_ ma) ma) + + ((_ ma clauses ...) + (>> ma (parse-m clauses ...))))) + + (define-syntax <- + (lambda (x) + (syntax-violation '<- "misplaced auxilliary keyword" x))) + )