1 (in-package #:clnl-lexer)
3 ; I played around with using #'read for netlogo code, which would have been neat.
4 ; However, it provides too many instances where people could inject CL code
5 ; and I wanted to prevent that. In the end, writing my own lexer became kind of fun.
7 (defvar *state* :initial)
8 (defvar *states* :extension-literal)
9 (defparameter *lexes* nil)
11 (defun as-symbol (text) (intern (string-upcase text) :keyword))
13 (defmacro deflex (state match &optional func)
17 ((,scanner (when (stringp ,match) (cl-ppcre:create-scanner ,match))))
24 (and (symbolp text) (eql text ,match))
28 (multiple-value-bind (start end) (cl-ppcre:scan ,scanner text)
29 (and start end (= 0 start) (/= 0 end)))))))
30 (lambda (text) (second (multiple-value-list (cl-ppcre:scan ,scanner text))))
31 ,(or func #'as-symbol))
39 TEXT: Some NetLogo code
40 AST: An ambigious AST that can later be parsed
44 LEX lexes NetLogo code.
46 LEX checks for some things, in as much as it can without knowing anything
47 about some of the backgrounds of NetLogo. However, it does the first pass
48 with as much as it can."
51 ((lex (find-if (lambda (f) (funcall f *state* :eof)) *lexes* :from-end t :key #'car)))
52 (when lex (list (funcall (third lex) :eof))))
54 ((lex (find-if (lambda (f) (funcall f *state* text)) *lexes* :from-end t :key #'car)))
55 (when (not lex) (error "Can't lex this: ~S" text))
57 ((val (funcall (third lex) (subseq text 0 (funcall (cadr lex) text)))))
59 (cons val (lex (subseq text (funcall (cadr lex) text))))
60 (lex (subseq text (funcall (cadr lex) text))))))))
62 (defun set-state (new-state)
63 (setf *state* new-state))
65 ; This part is the actual netlogo spec
67 (defvar *string-text* "(\\\\\"|\\\\n|\\\\r|\\\\t|\\\\|[^\\r\\n\\\"])")
68 (defvar *nonnewline_white_space_char* "[ \\t\\b\\012]")
69 (defvar *letter* "\\w")
70 (defvar *digit* "\\d")
71 (defvar *identifier-char* "[\\w\\d_.?=\*!<>:#\+/%$\^'&-]")
73 ;(defvar *extension-literal-depth* 0)
74 ;(defstruct extension-literal text)
76 ;(deflex :initial "{{"
78 ; (set-state :extension-literal)
82 ;(deflex :extension-literal "}}"
84 ; (if (= 0 *extension-literal-depth*)
85 ; (progn (set-state :initial) text)
86 ; (progn (decf *extension-literal-depth*) (as-symbol text)))))
88 ;(deflex :extension-literal "{{"
89 ; (lambda (text) (incf *extension-literal-depth*) text))
91 ;(deflex :extension-literal "\n|\r" (lambda () (error "End of line reached unexpectedly")))
92 ;(deflex :extension-literal :eof (lambda () (error "end of file reached unexpectedly")))
93 ;(deflex :extension-literal ".")
95 (deflex :initial "[,\\{\\}\\[\\]\\(\\)]" #'as-symbol)
96 (deflex :initial *nonnewline_white_space_char* (constantly nil))
97 (deflex :initial "\\n|\\r" (constantly nil))
98 (deflex :initial ";.*[\n\r]?" (constantly nil))
99 (deflex :initial (format nil "-?\\.?[0-9]~A*" *identifier-char*)
104 ((*readtable* (copy-readtable nil))
106 (read-from-string (format nil "~Ad0" text)))))
107 (if (numberp num?) num? (error "Invalid number")))))
109 (deflex :initial (format nil "~A*" *identifier-char*) #'as-symbol)
110 (deflex :initial (format nil "\"~A*\"" *string-text*) #'identity)
111 (deflex :initial (format nil "\"~A*" *string-text*)
112 (lambda (text) (declare (ignore text)) (error "Closing double quote is missing")))
113 ;(deflex :initial "." (lambda (text) (error "Non standard character is not allowed")))