thin-provisioning-tools/functional-tests/regex.scm

(library
  (regex)
  (export regex)
  (import (chezscheme)
          (fmt fmt)
          (loops)
          (prefix (parser-combinators) p:)
          (srfi s8 receive)
          (matchable)
          (utils))

  ;; Simple regex library, because it's friday and I'm bored.
  ;; Playing with the ideas in: https://swtch.com/~rsc/regexp/regexp2.html
  ;; which reminded me of reading through the source code to Sam in '93.

  ;; Rather than parsing a string we'll use expressions.
  ;; (lit <string>)
  ;; (seq rx1 rx2)
  ;; (alt rx1 rx2)
  ;; (opt rx)
  ;; (star rx)
  ;; (plus rx)
  ;;
  ;; The expressions get compiled into a vector of vm instructions.
  ;; (char pred) ; where fn :: char -> bool
  ;; (match)
  ;; (jmp x)
  ;; (split x y)

  (define (append-instr code . i) (append code i))
  (define (label-instr l) `(label ,l))
  (define (jmp-instr l) `(jmp ,l))
  (define (char-instr fn) `(char ,fn))
  (define (split-instr l1 l2) `(split ,l1 ,l2))
  (define (match-instr) '(match))
  (define (match-instr? instr) (equal? '(match) instr))

  (define (label-code label code)
    (cons (label-instr label) code))

  ;; Compiles to a list of labelled instructions that can later be flattened
  ;; into a linear sequence.
  (define (lit str)
    (map (lambda (c1)
           (char-instr
             (lambda (c2)
               (char=? c1 c2))))
         (string->list str)))

  (define (seq rx1 rx2)
    (append rx1 rx2))

  (define (alt rx1 rx2)
    (let ((label1 (gensym))
          (label2 (gensym))
          (tail (gensym)))
      (let ((c1 (label-code label1
                            (append-instr rx1 (jmp-instr tail))))
            (c2 (label-code label2 rx2)))
        (cons (split-instr label1 label2)
              (append-instr (append c1 c2) (label-instr tail))))))

  (define (opt rx)
    (let ((head (gensym))
          (tail (gensym)))
      (cons (split-instr head tail)
            (label-code head
                        (append-instr rx (label-instr tail))))))

  (define (star rx)
    (let ((head (gensym))
          (body (gensym))
          (tail (gensym)))
      (label-code head
                  (cons (split-instr body tail)
                        (label-code body
                                    (append-instr rx
                                                  (jmp-instr head)
                                                  (label-instr tail)))))))

  (define (plus rx)
    (let ((head (gensym))
          (tail (gensym)))
      (label-code head
                  (append-instr rx
                                (split-instr head tail)
                                (label-instr tail)))))

  (define (label-locations code)
    (let ((locs (make-eq-hashtable)))
     (let loop ((pc 0)
                (code code))
       (if (null? code)
           locs
           (match (car code)
                  (('label l)
                   (begin
                     (hashtable-set! locs l pc)
                     (loop pc (cdr code))))
                  (instr
                    (loop (+ 1 pc) (cdr code))))))))

  (define (remove-labels code locs)
    (let loop ((pc 0)
               (code code)
               (acc '()))
      (if (null? code)
          (reverse acc)
          (match (car code)
                 (('label l)
                  (loop pc (cdr code) acc))

                 (('jmp l)
                  (loop (+ 1 pc) (cdr code)
                        (cons `(jmp ,(hashtable-ref locs l #f)) acc)))

                 (('split l1 l2)
                  (loop (+ 1 pc) (cdr code)
                        (cons `(split ,(hashtable-ref locs l1 #f)
                                      ,(hashtable-ref locs l2 #f))
                              acc)))

                 (instr (loop (+ 1 pc) (cdr code) (cons instr acc)))))))

  (define (optimise-jumps! code)
    (define (single-pass)
      (let ((changed #f))
       (upto (n (vector-length code))
             (match (vector-ref code n)
                    (('jmp l)
                     (when (match-instr? (vector-ref code l))
                       (set! changed #t)
                       (vector-set! code n (match-instr))))

                    (('split l1 l2)
                     (when (or (match-instr? (vector-ref code l1))
                               (match-instr? (vector-ref code l2)))
                       (set! changed #t)
                       (vector-set! code n (match-instr))))

                    (_ _)))
       changed))

    (let loop ()
     (when (single-pass)
       (loop)))
    code)

  (define (compile-to-symbols rx)
    (let ((rx (append-instr rx (match-instr))))
     (optimise-jumps!
       (list->vector
         (remove-labels rx (label-locations rx))))))

  ;; A 'thread' consists of an index into the instructions.  A 'yarn holds the
  ;; current threads.  Note there cannot be more threads than instructions, so
  ;; a yarn is represented as a vector the same length as the instructions.
  ;; Threads are run in lock step, all taking the same input.
  (define-record-type yarn
                      (fields (mutable size)
                              (mutable stack)
                              (mutable seen)))

  (define (mk-yarn count)
    (make-yarn 0 (make-vector count) (make-vector count #f)))

  (define (clear-yarn! y)
    (yarn-size-set! y 0)
    (vector-fill! (yarn-seen y) #f))

  (define (add-thread! y i)
    (unless (vector-ref (yarn-seen y) i)
      (vector-set! (yarn-seen y) i #t)
      (vector-set! (yarn-stack y) (yarn-size y) i)
      (yarn-size-set! y (+ 1 (yarn-size y)))))

  (define (pop-thread! y)
    (if (zero? (yarn-size y))
        #f
        (begin
          (yarn-size-set! y (- (yarn-size y) 1))
          (vector-ref (yarn-stack y) (yarn-size y)))))

  (define (no-threads? y)
    (zero? (yarn-size y)))

  ;; FIXME: hack
  (define end-of-string #\x0)

  (define (compile-rx rx)
    (let* ((sym-code (compile-to-symbols rx))
           (code-len (vector-length sym-code))
           (threads (mk-yarn code-len))
           (next-threads (mk-yarn code-len))
           (code #f))

      (define (compile-instr instr)
        (match instr
               (('match)
                (lambda (in-c pc) 'match))

               (('char fn)
                (lambda (in-c pc)
                  ;; use eq? because in-c isn't always a char
                  (when (fn in-c)
                    (add-thread! next-threads (+ 1 pc)))))

               (('jmp l)
                (lambda (in-c pc)
                  (add-thread! threads l)))

               (('split l1 l2)
                (lambda (in-c pc)
                  (add-thread! threads l1)
                  (add-thread! threads l2)))))

      (define (step in-c)
        (let loop ((pc (pop-thread! threads)))
         (and pc
              (if (eq? 'match ((vector-ref code pc) in-c pc))
                  'match
                  (loop (pop-thread! threads))))))

      ;(fmt #t (dsp "running ") (pretty code) nl)

      ;; compile to closures to avoid calling match in the loop.
      (upto (n code-len)
            (set! code (vector-map compile-instr sym-code)))

      (lambda (txt)
        (add-thread! threads 0)
        (let ((txt-len (string-length txt)))
         (let c-loop ((c-index 0))
          (if (< c-index txt-len)
              ;; FIXME: make step return a bool
              (if (eq? 'match (step (string-ref txt c-index)))
                  #t
                  (if (no-threads? next-threads)
                      #f
                      (begin
                        (swap! threads next-threads)
                        (clear-yarn! next-threads)
                        (c-loop (+ 1 c-index)))))
              (eq? 'match (step end-of-string))))))))

  ;;;--------------------------------------------------------
  ;;; Parser

  ;; FIXME: ^ and ? aren't in the grammar, and eos/$ isn't wired up

  (define raw-char
    (let ((meta-chars (string->list "\\^$*+?[]()|")))
     (define (not-meta c)
       (not (member c meta-chars)))

     (p:alt (p:parse-m (p:<- c (p:accept-char not-meta))
                       (p:pure c))
            (p:>> (p:lit "\\")
                  (p:accept-char (lambda (c) #t))))))

  (define (bracket before after ma)
    (p:>> before (p:<* ma after)))

  (define (negate fn)
    (lambda (c)
      (not (fn c))))

  ;;-----------------------------------------------------------
  ;; Low level char combinators.  These build char predicates.

  ;; char-rx := any non metacharacter | "\" metacharacter
  ;; builds a predicate that accepts the char
  (define char-rx
    (p:parse-m (p:<- c1 raw-char)
               (p:pure (lambda (c2)
                         (char=? c1 c2)))))

  ;; range := char-rx "-" char-rx
  (define range
    (p:parse-m (p:<- c1 raw-char)
               (p:lit "-")
               (p:<- c2 raw-char)
               (p:pure (lambda (c)
                         (char<=? c1 c c2)))))

  ;; set-items := range | char-rx
  (define set-item (p:alt range char-rx))

  (define (or-preds preds)
    (lambda (c)
      (let loop ((preds preds))
       (if (null? preds)
           #f
           (or ((car preds) c)
               (loop (cdr preds)))))))

  ;; set-items := set-item+
  (define set-items
    (p:lift or-preds (p:many+ set-item)))

  ;; negative-set := "[^" set-items "]"
  (define negative-set
    (bracket (p:lit "[^")
             (p:lit "]")
             (p:lift negate set-items)))

  ;; positive-set := "[" set-items "]"
  (define positive-set
    (bracket (p:lit "[")
             (p:lit "]")
             set-items))

  ;; set := positive-set | negative-set
  (define set (p:alt positive-set negative-set))

  ;; eos := "$"
  ;; FIXME: ???
  (define eos (p:lit "$"))

  ;; any := "."
  (define any (p:>> (p:lit ".") (p:pure (lambda (_) #t))))

  (define (combine rs)
	  (fold-left seq (car rs) (cdr rs)))

  ;;-----------------------------------------------------------
  ;; Higher level combinators, these build a symbolic rx

  ;; There's mutual recursion here which would send the combinators into an
  ;; infinite loop whilst they are being built (not during parsing).  So we hot
  ;; patch rx, making it available for construction, and then redefine it on
  ;; first use.
  (define rx
    (indirect-lambda ()
     (p:error-m "rx not bound")))

  ;; group := "(" rx ")"
  (define group
    (bracket (p:lit "(")
             (p:lit ")")
             rx))

  ;; elementary-rx := group | any | eos | char-rx | set
  ;; FIXME: put eos and group back in
  (define elementary-rx
    (p:alt (p:lift (lambda (fn)
                     (list (char-instr fn)))
                   (p:one-of any char-rx set))
           group))

  ;; plus-rx := elementary-rx "+"
  (define plus-rx
    (p:lift plus (p:<* elementary-rx (p:lit "+"))))

  ;; star-rx := elementary-rx "*"
  (define star-rx
    (p:lift star (p:<* elementary-rx (p:lit "*"))))

  ;; basic-rx := star-rx | plus-rx | elementary-rx
  (define basic-rx
    (p:one-of star-rx plus-rx elementary-rx))

  ;; simple-rx := basic-rx+
  (define simple-rx
    (p:lift combine (p:many+ basic-rx)))

  ;; rx := simple-rx ("|" simple-rx)*
  (define hotpatch-rx
    (let ((patched #f))
     (lambda ()
       (unless patched
         (set! patched #t)
         (set-lambda! rx
                      (p:lift2 (lambda (r rs)
                                 (fold-left alt r rs))
                               simple-rx
                               (p:many* (p:>> (p:lit "|") simple-rx))))))))

  ;;-----------------------------------------------------------------------
  ;; The top level routine, parses the regex string and compiles it into a
  ;; matcher, or returns false if the parse failed.
  ;; regex :: string -> (matcher <string>)
  ;; FIXME: it's tempting to return a function that raises if there's a parse error.
  (define (regex str)
     (hotpatch-rx)
     (receive (v st) (p:parse rx str)
	      (if (p:success? st)
		  (compile-rx v)
		  #f))))
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30			`(library`
			`(regex)`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(export regex)`
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30			`(import (chezscheme)`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30			`(fmt fmt)`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`(loops)`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(prefix (parser-combinators) p:)`
			`(srfi s8 receive)`
[functional-tests] move swap! to (utils) 2017-08-23 15:19:36 +05:30			`(matchable)`
			`(utils))`
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30
			`;; Simple regex library, because it's friday and I'm bored.`
			`;; Playing with the ideas in: https://swtch.com/~rsc/regexp/regexp2.html`
			`;; which reminded me of reading through the source code to Sam in '93.`

			`;; Rather than parsing a string we'll use expressions.`
			`;; (lit <string>)`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30			`;; (seq rx1 rx2)`
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30			`;; (alt rx1 rx2)`
			`;; (opt rx)`
			`;; (star rx)`
			`;; (plus rx)`
			`;;`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`;; The expressions get compiled into a vector of vm instructions.`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`;; (char pred) ; where fn :: char -> bool`
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30			`;; (match)`
			`;; (jmp x)`
			`;; (split x y)`

[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`(define (append-instr code . i) (append code i))`
			(define (label-instr l) `(label ,l))
			(define (jmp-instr l) `(jmp ,l))
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			(define (char-instr fn) `(char ,fn))
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			(define (split-instr l1 l2) `(split ,l1 ,l2))
			`(define (match-instr) '(match))`
			`(define (match-instr? instr) (equal? '(match) instr))`
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30
			`(define (label-code label code)`
			`(cons (label-instr label) code))`

			`;; Compiles to a list of labelled instructions that can later be flattened`
			`;; into a linear sequence.`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`(define (lit str)`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(map (lambda (c1)`
			`(char-instr`
			`(lambda (c2)`
			`(char=? c1 c2))))`
			`(string->list str)))`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30			`(define (seq rx1 rx2)`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`(append rx1 rx2))`

			`(define (alt rx1 rx2)`
			`(let ((label1 (gensym))`
			`(label2 (gensym))`
			`(tail (gensym)))`
			`(let ((c1 (label-code label1`
			`(append-instr rx1 (jmp-instr tail))))`
			`(c2 (label-code label2 rx2)))`
			`(cons (split-instr label1 label2)`
			`(append-instr (append c1 c2) (label-instr tail))))))`

			`(define (opt rx)`
			`(let ((head (gensym))`
			`(tail (gensym)))`
			`(cons (split-instr head tail)`
			`(label-code head`
			`(append-instr rx (label-instr tail))))))`

			`(define (star rx)`
			`(let ((head (gensym))`
[functional-tests/regex] fix bug in the star combinator 2017-08-18 22:31:31 +05:30			`(body (gensym))`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`(tail (gensym)))`
[functional-tests/regex] fix bug in the star combinator 2017-08-18 22:31:31 +05:30			`(label-code head`
			`(cons (split-instr body tail)`
			`(label-code body`
			`(append-instr rx`
			`(jmp-instr head)`
			`(label-instr tail)))))))`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30
			`(define (plus rx)`
			`(let ((head (gensym))`
			`(tail (gensym)))`
			`(label-code head`
			`(append-instr rx`
			`(split-instr head tail)`
			`(label-instr tail)))))`

			`(define (label-locations code)`
			`(let ((locs (make-eq-hashtable)))`
			`(let loop ((pc 0)`
			`(code code))`
			`(if (null? code)`
			`locs`
			`(match (car code)`
			`(('label l)`
			`(begin`
			`(hashtable-set! locs l pc)`
			`(loop pc (cdr code))))`
			`(instr`
			`(loop (+ 1 pc) (cdr code))))))))`

			`(define (remove-labels code locs)`
			`(let loop ((pc 0)`
			`(code code)`
			`(acc '()))`
			`(if (null? code)`
			`(reverse acc)`
			`(match (car code)`
			`(('label l)`
			`(loop pc (cdr code) acc))`

			`(('jmp l)`
			`(loop (+ 1 pc) (cdr code)`
			(cons `(jmp ,(hashtable-ref locs l #f)) acc)))

			`(('split l1 l2)`
			`(loop (+ 1 pc) (cdr code)`
			(cons `(split ,(hashtable-ref locs l1 #f)
			`,(hashtable-ref locs l2 #f))`
			`acc)))`

			`(instr (loop (+ 1 pc) (cdr code) (cons instr acc)))))))`

			`(define (optimise-jumps! code)`
[functional-tests/regex] tidying 2017-08-20 19:06:02 +05:30			`(define (single-pass)`
			`(let ((changed #f))`
			`(upto (n (vector-length code))`
			`(match (vector-ref code n)`
			`(('jmp l)`
			`(when (match-instr? (vector-ref code l))`
			`(set! changed #t)`
			`(vector-set! code n (match-instr))))`

			`(('split l1 l2)`
			`(when (or (match-instr? (vector-ref code l1))`
			`(match-instr? (vector-ref code l2)))`
			`(set! changed #t)`
			`(vector-set! code n (match-instr))))`

			`(_ _)))`
			`changed))`

			`(let loop ()`
			`(when (single-pass)`
			`(loop)))`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`code)`

[functional-tests/regex] tidying 2017-08-20 19:06:02 +05:30			`(define (compile-to-symbols rx)`
[functional-tests] more work on the regex engine 2017-08-18 17:51:17 +05:30			`(let ((rx (append-instr rx (match-instr))))`
			`(optimise-jumps!`
			`(list->vector`
			`(remove-labels rx (label-locations rx))))))`
[functional-tests] start working on a little regex matcher 2017-08-18 16:57:17 +05:30
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`;; A 'thread' consists of an index into the instructions. A 'yarn holds the`
			`;; current threads. Note there cannot be more threads than instructions, so`
			`;; a yarn is represented as a vector the same length as the instructions.`
			`;; Threads are run in lock step, all taking the same input.`
			`(define-record-type yarn`
			`(fields (mutable size)`
			`(mutable stack)`
			`(mutable seen)))`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`(define (mk-yarn count)`
			`(make-yarn 0 (make-vector count) (make-vector count #f)))`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`(define (clear-yarn! y)`
			`(yarn-size-set! y 0)`
			`(vector-fill! (yarn-seen y) #f))`
[functional-tests/regex] Restructure the matcher to try and reduce memory 2017-08-18 23:02:05 +05:30
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`(define (add-thread! y i)`
			`(unless (vector-ref (yarn-seen y) i)`
			`(vector-set! (yarn-seen y) i #t)`
			`(vector-set! (yarn-stack y) (yarn-size y) i)`
			`(yarn-size-set! y (+ 1 (yarn-size y)))))`
[functional-tests/regex] stop thread-set from allocating We now allocate 0 bytes when matching. But it makes practically no difference to the execution time. 2017-08-19 01:54:38 +05:30
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`(define (pop-thread! y)`
			`(if (zero? (yarn-size y))`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30			`#f`
[functional-tests/regex] stop thread-set from allocating We now allocate 0 bytes when matching. But it makes practically no difference to the execution time. 2017-08-19 01:54:38 +05:30			`(begin`
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`(yarn-size-set! y (- (yarn-size y) 1))`
			`(vector-ref (yarn-stack y) (yarn-size y)))))`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30
[functional-tests/regex] rename thread-set to yarn 2017-08-19 13:34:48 +05:30			`(define (no-threads? y)`
			`(zero? (yarn-size y)))`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`;; FIXME: hack`
			`(define end-of-string #\x0)`

[functional-tests/regex] more optimisation. I think we're going to have to lose the call/cc; something is allocating a lot of memory. 2017-08-18 23:58:07 +05:30			`(define (compile-rx rx)`
[functional-tests/regex] tidying 2017-08-20 19:06:02 +05:30			`(let* ((sym-code (compile-to-symbols rx))`
			`(code-len (vector-length sym-code))`
			`(threads (mk-yarn code-len))`
			`(next-threads (mk-yarn code-len))`
			`(code #f))`

			`(define (compile-instr instr)`
			`(match instr`
			`(('match)`
			`(lambda (in-c pc) 'match))`

[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(('char fn)`
[functional-tests/regex] tidying 2017-08-20 19:06:02 +05:30			`(lambda (in-c pc)`
			`;; use eq? because in-c isn't always a char`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(when (fn in-c)`
[functional-tests/regex] tidying 2017-08-20 19:06:02 +05:30			`(add-thread! next-threads (+ 1 pc)))))`

			`(('jmp l)`
			`(lambda (in-c pc)`
			`(add-thread! threads l)))`

			`(('split l1 l2)`
			`(lambda (in-c pc)`
			`(add-thread! threads l1)`
			`(add-thread! threads l2)))))`

			`(define (step in-c)`
			`(let loop ((pc (pop-thread! threads)))`
			`(and pc`
			`(if (eq? 'match ((vector-ref code pc) in-c pc))`
			`'match`
			`(loop (pop-thread! threads))))))`

			`;(fmt #t (dsp "running ") (pretty code) nl)`

			`;; compile to closures to avoid calling match in the loop.`
			`(upto (n code-len)`
			`(set! code (vector-map compile-instr sym-code)))`

			`(lambda (txt)`
			`(add-thread! threads 0)`
			`(let ((txt-len (string-length txt)))`
			`(let c-loop ((c-index 0))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(if (< c-index txt-len)`
			`;; FIXME: make step return a bool`
			`(if (eq? 'match (step (string-ref txt c-index)))`
			`#t`
			`(if (no-threads? next-threads)`
			`#f`
			`(begin`
[functional-tests] move swap! to (utils) 2017-08-23 15:19:36 +05:30			`(swap! threads next-threads)`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(clear-yarn! next-threads)`
			`(c-loop (+ 1 c-index)))))`
			`(eq? 'match (step end-of-string))))))))`
[functional-tests] regex matcher starts to work. Very slow implementation. 2017-08-18 20:32:43 +05:30
[functional-tests/regex] add comment for regex grammar. 2017-08-20 18:05:40 +05:30			`;;;--------------------------------------------------------`
			`;;; Parser`

[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`;; FIXME: ^ and ? aren't in the grammar, and eos/$ isn't wired up`

			`(define raw-char`
			`(let ((meta-chars (string->list "\\^$*+?[]()\|")))`
			`(define (not-meta c)`
			`(not (member c meta-chars)))`

			`(p:alt (p:parse-m (p:<- c (p:accept-char not-meta))`
			`(p:pure c))`
			`(p:>> (p:lit "\\")`
			`(p:accept-char (lambda (c) #t))))))`

			`(define (bracket before after ma)`
			`(p:>> before (p:<* ma after)))`

			`(define (negate fn)`
			`(lambda (c)`
			`(not (fn c))))`

			`;;-----------------------------------------------------------`
			`;; Low level char combinators. These build char predicates.`

			`;; char-rx := any non metacharacter \| "\" metacharacter`
			`;; builds a predicate that accepts the char`
			`(define char-rx`
			`(p:parse-m (p:<- c1 raw-char)`
			`(p:pure (lambda (c2)`
			`(char=? c1 c2)))))`

			`;; range := char-rx "-" char-rx`
			`(define range`
			`(p:parse-m (p:<- c1 raw-char)`
			`(p:lit "-")`
			`(p:<- c2 raw-char)`
			`(p:pure (lambda (c)`
			`(char<=? c1 c c2)))))`

			`;; set-items := range \| char-rx`
			`(define set-item (p:alt range char-rx))`

			`(define (or-preds preds)`
			`(lambda (c)`
			`(let loop ((preds preds))`
			`(if (null? preds)`
			`#f`
			`(or ((car preds) c)`
			`(loop (cdr preds)))))))`

			`;; set-items := set-item+`
			`(define set-items`
			`(p:lift or-preds (p:many+ set-item)))`

			`;; negative-set := "[^" set-items "]"`
			`(define negative-set`
			`(bracket (p:lit "[^")`
			`(p:lit "]")`
			`(p:lift negate set-items)))`

			`;; positive-set := "[" set-items "]"`
			`(define positive-set`
			`(bracket (p:lit "[")`
			`(p:lit "]")`
			`set-items))`

			`;; set := positive-set \| negative-set`
			`(define set (p:alt positive-set negative-set))`

			`;; eos := "$"`
			`;; FIXME: ???`
			`(define eos (p:lit "$"))`

			`;; any := "."`
			`(define any (p:>> (p:lit ".") (p:pure (lambda (_) #t))))`

[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define (combine rs)`
			`(fold-left seq (car rs) (cdr rs)))`

[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`;;-----------------------------------------------------------`
			`;; Higher level combinators, these build a symbolic rx`

[functional-tests/regex] reorder definition of rx 2017-08-29 17:54:11 +05:30			`;; There's mutual recursion here which would send the combinators into an`
			`;; infinite loop whilst they are being built (not during parsing). So we hot`
			`;; patch rx, making it available for construction, and then redefine it on`
			`;; first use.`
			`(define rx`
			`(indirect-lambda ()`
			`(p:error-m "rx not bound")))`

[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`;; group := "(" rx ")"`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define group`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`(bracket (p:lit "(")`
			`(p:lit ")")`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`rx))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30
			`;; elementary-rx := group \| any \| eos \| char-rx \| set`
			`;; FIXME: put eos and group back in`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define elementary-rx`
			`(p:alt (p:lift (lambda (fn)`
			`(list (char-instr fn)))`
			`(p:one-of any char-rx set))`
			`group))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30
			`;; plus-rx := elementary-rx "+"`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define plus-rx`
			`(p:lift plus (p:<* elementary-rx (p:lit "+"))))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30
			`;; star-rx := elementary-rx "*"`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define star-rx`
			`(p:lift star (p:<* elementary-rx (p:lit "*"))))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30
			`;; basic-rx := star-rx \| plus-rx \| elementary-rx`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define basic-rx`
			`(p:one-of star-rx plus-rx elementary-rx))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30
			`;; simple-rx := basic-rx+`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(define simple-rx`
			`(p:lift combine (p:many+ basic-rx)))`
[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30
[functional-tests/regex] use indirect-lambda to hotpatch rx 2017-08-29 13:57:28 +05:30			`;; rx := simple-rx ("\|" simple-rx)*`
			`(define hotpatch-rx`
			`(let ((patched #f))`
			`(lambda ()`
			`(unless patched`
			`(set! patched #t)`
			`(set-lambda! rx`
			`(p:lift2 (lambda (r rs)`
			`(fold-left alt r rs))`
			`simple-rx`
			`(p:many* (p:>> (p:lit "\|") simple-rx))))))))`

[functional-tests/regex] parser mostly working. Just need to stop grouping '(' ')' from looping forever. 2017-08-22 21:58:13 +05:30			`;;-----------------------------------------------------------------------`
			`;; The top level routine, parses the regex string and compiles it into a`
			`;; matcher, or returns false if the parse failed.`
			`;; regex :: string -> (matcher <string>)`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`;; FIXME: it's tempting to return a function that raises if there's a parse error.`
[functional-tests/regex] use indirect-lambda to hotpatch rx 2017-08-29 13:57:28 +05:30			`(define (regex str)`
			`(hotpatch-rx)`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30			`(receive (v st) (p:parse rx str)`
			`(if (p:success? st)`
			`(compile-rx v)`
[functional-tests/regex] use indirect-lambda to hotpatch rx 2017-08-29 13:57:28 +05:30			`#f))))`
[functional-tests/regex] Get groups working Hacked a hotpatch soln. 2017-08-28 22:08:49 +05:30