From: Frank Duncan Date: Sat, 30 May 2015 17:45:43 +0000 (-0500) Subject: Add very simple first pass at lexer X-Git-Tag: v0.0.0~21 X-Git-Url: https://code.consxy.com/gitweb/gitweb.cgi?a=commitdiff_plain;h=b398914bb75f7d9ac9b1d19076e98338281900b0;p=clnl Add very simple first pass at lexer --- diff --git a/src/main/cl-nl.asd b/src/main/cl-nl.asd index f6ca9e5..74b5393 100644 --- a/src/main/cl-nl.asd +++ b/src/main/cl-nl.asd @@ -5,4 +5,6 @@ :author "Frank Duncan (frank@kank.com)" :serial t :components ((:file "package") - (:file "main"))) + (:file "lex") + (:file "main")) + :depends-on (:cl-ppcre)) diff --git a/src/main/lex.lisp b/src/main/lex.lisp new file mode 100644 index 0000000..b1da848 --- /dev/null +++ b/src/main/lex.lisp @@ -0,0 +1,102 @@ +(defpackage #:cl-nl.lexer + (:use :common-lisp) + (:export :lex)) + +(in-package #:cl-nl.lexer) + +; I played around with using #'read for netlogo code, which would have been neat. +; However, it provides too many instances where people could inject CL code +; and I wanted to prevent that. In the end, writing my own lexer became kind of fun. + +(defvar *state* :initial) +(defvar *states* :extension-literal) +(defparameter *lexes* nil) + +(defun as-symbol (text) (intern (string-upcase text) :keyword)) + +(defmacro deflex (state match &optional func) + (let + ((scanner (gensym))) + `(let + ((,scanner (when (stringp ,match) (cl-ppcre:create-scanner ,match)))) + (pushnew + (list + (lambda (state text) + (and + (eql ,state state) + (or + (and (symbolp text) (eql text ,match)) + (and ,scanner + (stringp text) + (multiple-value-bind (start end) (cl-ppcre:scan ,scanner text) + (and start end (= 0 start) (/= 0 end))))))) + (lambda (text) (second (multiple-value-list (cl-ppcre:scan ,scanner text)))) + ,(or func #'as-symbol)) + *lexes*)))) + +(defun lex (text) + (if (string= "" text) + (let + ((lex (find-if (lambda (f) (funcall f *state* :eof)) *lexes* :key #'car))) + (when lex (list (funcall (third lex) :eof)))) + (let + ((lex (find-if (lambda (f) (funcall f *state* text)) *lexes* :key #'car))) + (when (not lex) (error "Can't lex this: ~S" text)) + (let + ((val (funcall (third lex) (subseq text 0 (funcall (cadr lex) text))))) + (if val + (cons val (lex (subseq text (funcall (cadr lex) text)))) + (lex (subseq text (funcall (cadr lex) text)))))))) + +(defun set-state (new-state) + (setf *state* new-state)) + +; This part is the actual netlogo spec + +(defvar *string-text* "(\\\"|\\r|\\n|\\t|\\\\|\\[^\"]|[^\r\n\"\\])*") +(defvar *nonnewline_white_space_char* "[ \\t\\b\\012]") +(defvar *letter* "\\w") +(defvar *digit* "\\d") +;(defparameter *identifier-char* "[\\w\\d_\\.?=\*!<>:#\+/%\$\^\'&-]") +(defvar *identifier-char* "[\\w\\d]") + +;(defvar *extension-literal-depth* 0) +;(defstruct extension-literal text) + +;(deflex :initial "{{" +; (lambda (text) +; (set-state :extension-literal) +; (as-symbol text) +; )) + +;(deflex :extension-literal "}}" +; (lambda (text) +; (if (= 0 *extension-literal-depth*) +; (progn (set-state :initial) text) +; (progn (decf *extension-literal-depth*) (as-symbol text))))) + +;(deflex :extension-literal "{{" +; (lambda (text) (incf *extension-literal-depth*) text)) + +;(deflex :extension-literal "\n|\r" (lambda () (error "End of line reached unexpectedly"))) +;(deflex :extension-literal :eof (lambda () (error "end of file reached unexpectedly"))) +;(deflex :extension-literal ".") + +(deflex :initial "[,\\{\\}\\[\\]\\(\\)]" #'as-symbol) +(deflex :initial *nonnewline_white_space_char* (constantly nil)) +(deflex :initial "\\n|\\r" (constantly nil)) +;(deflex :initial ";.*[\n\r]?" nil) +;(deflex :initial (format nil "-?\.?[0-9]~A" *identifier-char*) +; (lambda (text) +; (let +; ((num? +; (let +; ((*readtable* (copy-readtable nil)) +; (*read-eval* nil)) +; (read-from-string text)))) +; (if (numberp num?) num? (error "Invalid number"))))) + +(deflex :initial (format nil "~A*" *identifier-char*) #'as-symbol) +;(deflex :initial (format nil "\"~A*\"" *string-text*)) +;(deflex :initial (format nil "\"~A*" *string-text*) (lambda (text) (error "Closing double quote is missing"))) +;(deflex :initial "." (lambda (text) (error "Non standard character is not allowed"))) diff --git a/src/main/main.lisp b/src/main/main.lisp index e774de3..d9780ba 100644 --- a/src/main/main.lisp +++ b/src/main/main.lisp @@ -1,4 +1,19 @@ (in-package #:cl-nl) +(defun e (ast) ast) + +(defun r (str) + (let + ((ast (cl-nl.lexer:lex str))) + (format t "AST for ~S became ~S~%" str ast) + ast)) + +(defun p (result) result) + (defun run () - (format t "AH HA~%")) + (loop for str = (read-line) + while str + do (p (e (r str)))) + + ;(format t "AH HA~%") + )