(in-package #:candle-aws) (defvar *aws-mutex* (sb-thread:make-mutex)) (defvar *aws-waitq* (sb-thread:make-waitqueue)) (defvar *aws-state* :initial) (defvar *aws-exec*) (defvar *aws-instance-id*) (defvar *aws-username*) (defvar *aws-keyfile*) (defvar *rsync-exec*) (defvar *ssh-exec*) (defvar *remote-work-dir*) (defvar *remote-candle-location*) (defmethod candle:process-job-in-system ((job-system (eql :aws)) job) (sb-thread:with-mutex (*aws-mutex*) ; Don't start it up until we process the first job (when (eql :initial *aws-state*) (setf *aws-state* :down) (start-shutdown-thread)) (when (eql :down *aws-state*) (start-aws-box)) (let ((retn (multiple-value-list (run-job job)))) (setf *aws-state* :up) (sb-thread:condition-broadcast *aws-waitq*) (values-list retn)))) (defun start-shutdown-thread () (log:info "Starting AWS shutdown thread") (sb-thread:make-thread (lambda () (loop (sb-thread:with-mutex (*aws-mutex*) (when (eql :down *aws-state*) (sb-thread:condition-wait *aws-waitq* *aws-mutex*)) (when (eql :shutting-down-soon *aws-state*) (stop-aws-box) (setf *aws-state* :down)) (when (eql :up *aws-state*) (setf *aws-state* :shutting-down-soon))) (sleep 30))) :name "AWS Shutdown Thread")) (defmethod candle:shutdown-system ((job-system (eql :aws))) (log:info "Shutting down AWS box for exit") ; If there's a job going, we need to wait for it to finish (sb-thread:with-mutex (*aws-mutex*)) (stop-aws-box)) (defun aws-command (cmd &rest args) (with-output-to-string (out) (sb-ext:run-program *aws-exec* (append (list "ec2" cmd) args) :output out :error *error-output*))) (defun describe-property (property) (read-from-string (aws-command "describe-instances" "--instance-ids" *aws-instance-id* "--query" (format nil "Reservations[0].Instances[0].~A" property)))) (defun get-remote-state () (intern (string-upcase (describe-property "State.Name")) :keyword)) (defun start-aws-box () (aws-command "start-instances" "--instance-ids" *aws-instance-id*) (loop :repeat 8 :until (eql :running (get-remote-state)) :do (sleep 15)) ; Make sure ssh and services are started up (sleep 15) (when (not (eql :running (get-remote-state))) (error "Waited two minutes and still not running...?"))) (defun stop-aws-box () (aws-command "stop-instances" "--instance-ids" *aws-instance-id*) (loop :repeat 8 :until (eql :stopped (get-remote-state)) :do (sleep 15)) (when (not (eql :stopped (get-remote-state))) (error "Waited two minutes and still not stopped...?"))) (defun run-job (job) (sb-ext:run-program *rsync-exec* (list "-az" "--delete" "-e" (format nil "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~A" *aws-keyfile*) (candle:project-dir (candle:job-project job)) (format nil "~A@~A:~A" *aws-username* (describe-property "PublicIpAddress") *remote-work-dir*))) (let* ((out nil) (code nil)) (setf out (with-output-to-string (out-str) (setf code (sb-ext:process-exit-code (sb-ext:run-program *ssh-exec* (list "-o" "StrictHostKeyChecking=no" "-o" "UserKnownHostsFile=/dev/null" "-i" *aws-keyfile* (describe-property "PublicIpAddress") (format nil "cd ~A ; ~A run" *remote-work-dir* *remote-candle-location*)) :output out-str :error out-str :wait t))))) (values (zerop code) out)))