X-Git-Url: https://code.consxy.com/gitweb/gitweb.cgi?p=clnl;a=blobdiff_plain;f=src%2Fmain%2Flex.lisp;h=0adeb7d2b4323faf794ce688f53ce08ab142fa22;hp=b1da84848d7792a5805f052e708d3efede778aa0;hb=89cb3482de1fefc2d8e5f67e1e04a0790e8cd243;hpb=b398914bb75f7d9ac9b1d19076e98338281900b0 diff --git a/src/main/lex.lisp b/src/main/lex.lisp index b1da848..0adeb7d 100644 --- a/src/main/lex.lisp +++ b/src/main/lex.lisp @@ -1,8 +1,4 @@ -(defpackage #:cl-nl.lexer - (:use :common-lisp) - (:export :lex)) - -(in-package #:cl-nl.lexer) +(in-package #:clnl-lexer) ; I played around with using #'read for netlogo code, which would have been neat. ; However, it provides too many instances where people could inject CL code @@ -17,48 +13,62 @@ (defmacro deflex (state match &optional func) (let ((scanner (gensym))) - `(let - ((,scanner (when (stringp ,match) (cl-ppcre:create-scanner ,match)))) - (pushnew - (list - (lambda (state text) - (and - (eql ,state state) - (or - (and (symbolp text) (eql text ,match)) - (and ,scanner - (stringp text) - (multiple-value-bind (start end) (cl-ppcre:scan ,scanner text) - (and start end (= 0 start) (/= 0 end))))))) - (lambda (text) (second (multiple-value-list (cl-ppcre:scan ,scanner text)))) - ,(or func #'as-symbol)) - *lexes*)))) + `(let + ((,scanner (when (stringp ,match) (cl-ppcre:create-scanner ,match)))) + (pushnew + (list + (lambda (state text) + (and + (eql ,state state) + (or + (and (symbolp text) (eql text ,match)) + (and + ,scanner + (stringp text) + (multiple-value-bind (start end) (cl-ppcre:scan ,scanner text) + (and start end (= 0 start) (/= 0 end))))))) + (lambda (text) (second (multiple-value-list (cl-ppcre:scan ,scanner text)))) + ,(or func #'as-symbol)) + *lexes*)))) (defun lex (text) + "LEX TEXT => AST + +ARGUMENTS AND VALUES: + + TEXT: Some NetLogo code + AST: An ambigious AST that can later be parsed + +DESCRIPTION: + + LEX lexes NetLogo code. + + LEX checks for some things, in as much as it can without knowing anything + about some of the backgrounds of NetLogo. However, it does the first pass + with as much as it can." (if (string= "" text) - (let - ((lex (find-if (lambda (f) (funcall f *state* :eof)) *lexes* :key #'car))) - (when lex (list (funcall (third lex) :eof)))) - (let - ((lex (find-if (lambda (f) (funcall f *state* text)) *lexes* :key #'car))) - (when (not lex) (error "Can't lex this: ~S" text)) - (let - ((val (funcall (third lex) (subseq text 0 (funcall (cadr lex) text))))) - (if val - (cons val (lex (subseq text (funcall (cadr lex) text)))) - (lex (subseq text (funcall (cadr lex) text)))))))) + (let + ((lex (find-if (lambda (f) (funcall f *state* :eof)) *lexes* :from-end t :key #'car))) + (when lex (list (funcall (third lex) :eof)))) + (let + ((lex (find-if (lambda (f) (funcall f *state* text)) *lexes* :from-end t :key #'car))) + (when (not lex) (error "Can't lex this: ~S" text)) + (let + ((val (funcall (third lex) (subseq text 0 (funcall (cadr lex) text))))) + (if val + (cons val (lex (subseq text (funcall (cadr lex) text)))) + (lex (subseq text (funcall (cadr lex) text)))))))) (defun set-state (new-state) (setf *state* new-state)) ; This part is the actual netlogo spec -(defvar *string-text* "(\\\"|\\r|\\n|\\t|\\\\|\\[^\"]|[^\r\n\"\\])*") +(defvar *string-text* "(\\\\\"|\\\\n|\\\\r|\\\\t|\\\\|[^\\r\\n\\\"])") (defvar *nonnewline_white_space_char* "[ \\t\\b\\012]") (defvar *letter* "\\w") (defvar *digit* "\\d") -;(defparameter *identifier-char* "[\\w\\d_\\.?=\*!<>:#\+/%\$\^\'&-]") -(defvar *identifier-char* "[\\w\\d]") +(defvar *identifier-char* "[\\w\\d_.?=\*!<>:#\+/%$\^'&-]") ;(defvar *extension-literal-depth* 0) ;(defstruct extension-literal text) @@ -85,18 +95,27 @@ (deflex :initial "[,\\{\\}\\[\\]\\(\\)]" #'as-symbol) (deflex :initial *nonnewline_white_space_char* (constantly nil)) (deflex :initial "\\n|\\r" (constantly nil)) -;(deflex :initial ";.*[\n\r]?" nil) -;(deflex :initial (format nil "-?\.?[0-9]~A" *identifier-char*) -; (lambda (text) -; (let -; ((num? -; (let -; ((*readtable* (copy-readtable nil)) -; (*read-eval* nil)) -; (read-from-string text)))) -; (if (numberp num?) num? (error "Invalid number"))))) +(deflex :initial ";.*[\n\r]?" (constantly nil)) +(deflex :initial (format nil "-?\\.?[0-9]~A*" *identifier-char*) + (lambda (text) + (let + ((num? + (let + ((*readtable* (copy-readtable nil)) + (*read-eval* nil)) + (read-from-string (format nil "~Ad0" text))))) + (if (numberp num?) num? (error "Invalid number"))))) (deflex :initial (format nil "~A*" *identifier-char*) #'as-symbol) -;(deflex :initial (format nil "\"~A*\"" *string-text*)) -;(deflex :initial (format nil "\"~A*" *string-text*) (lambda (text) (error "Closing double quote is missing"))) +(deflex :initial (format nil "\"~A*\"" *string-text*) + ; While this shouldn't let harmful strings in, + ; one can't be too careful + (lambda (text) + (let + ((*readtable* (copy-readtable nil)) + (*read-eval* nil)) + (read-from-string text)))) + +(deflex :initial (format nil "\"~A*" *string-text*) + (lambda (text) (declare (ignore text)) (error "Closing double quote is missing"))) ;(deflex :initial "." (lambda (text) (error "Non standard character is not allowed")))