;;; -*- Mode: Lisp -*- #|CLARITY: Common Lisp Data Alignment Repository Copyright (c) 2006 Samantha Kleinberg All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA contact: Samantha AT Bioinformatics DOT nyu DOT edu 715 Broadway, 10th floor New York, NY 10003|# (in-package "CLARITY") (eval-when (:compile-toplevel :load-toplevel :execute) (require "sql") (require "odbc")) #.(sql:enable-sql-reader-syntax) (defclass clarity-handle () ( ;; GRAPH/DB maintainance. ;; none yet ;; Connection information. (connection :accessor connection :initform nil) ) (:documentation "The CLARITY Handle Class. Class containing specifications for connecting to the database.")) ;;; Protocol. (defgeneric clarity-handle-p (x) (:method ((x clarity-handle)) t) (:method ((x t)) nil)) (defgeneric connect (clarity-handle db-spec &optional connection) (:documentation "Sets up a connection to an the clarity database for the handle. arguments: connection string, optional connection usage: (godb:connect ""data source name/user/password"") notes: ")) (defgeneric insert (clarity-handle filename filedate) (:documentation "Takes a file and adds it to the database arguments: handle, path to file, date of file creation usage: (clarity:insert cl-handle ""Z:/filename.txt"" MO/DAY/YR)" )) (defgeneric ganttify (clarity-handle filename) (:documentation "Takes a file with columns of numerical data and transforms data into strings arguments: clarity-handle filename usage: (clarity:ganttify cl-handle ""Z:/filename.txt)" )) (defgeneric get-children (node-id) (:documentation "Gets left and right child of node in phylogenetic tree arguments: tree-id usage: (clarity:get-children tree-id)" )) (defgeneric get-node-terms (clarity-handle node-id) (:documentation "Gets all terms associated with a tree node. arguments: cl-handle tree-id usage: (clarity:get-node-terms cl-handle tree-id) notes: Terms may be part of conensus sequence or actual data set" )) (defun sqlcon () "Connects to sql using stored connection information." (sql:connect "GODB_internal/root" :database-type :odbc :if-exists :old)) (defvar *cl-handle*) (defvar *current-clarity-handle* nil) (defun make-clarity-handle () "Creates a new instance of the clarity handle class and assigns it to the current-clarity-handle variable" (setf *current-clarity-handle* (make-instance 'clarity-handle))) (defun disconnect-clarity-dbs () (when (member (connection *current-clarity-handle*) (sql:connected-databases)) (sql:disconnect :database (connection *current-clarity-handle*)))) (defmethod connect ((clh clarity-handle) (db-spec string) &optional connection) (format t "~S connecting as ~S~%" clh db-spec) (if connection (setf (connection clh) connection) (setf (connection clh) (sql:connect db-spec :database-type :odbc :if-exists :old)))) (defun number-to-string (number) (format nil "~s" number) ) ;;will have variety of insert methods, for different types of input. ;;for now, only simple, very processed, input is allowed (defmethod insert ((clh clarity-handle) filename filedate) (let ((gantt-chart) (file-id)) ;;insert datafile in db (sql:with-transaction (sql:insert-records :into [|timecourse_data|] :attributes '([|datafile_path|] [|creation_date|]) :values (list filename filedate) :database (connection clh))) (setf file-id (lastid clh :table "timecourse_data")) ;:Create gantt chart of datafile's contents (multiple-value-bind (header gantt-chart) (ganttify clh filename) (store-gantt-charts clh file-id gantt-chart) (store-numerical-data clh file-id filename) ) ;;ask if file should be inserted into tree (when (capi:confirm-yes-or-no "Add new file to tree?") (tree-insert clh file-id) ) ) ) (defmethod insert-with-probes ((clh clarity-handle) filename filedate &key probes-p) (let ((gantt-chart) (file-id)) ;;insert datafile in db (sql:with-transaction (sql:insert-records :into [|timecourse_data|] :attributes '([|datafile_path|] [|creation_date|]) :values (list filename filedate) :database (connection clh))) (setf file-id (lastid clh :table "timecourse_data")) ;:Create gantt chart of datafile's contents (multiple-value-bind (header gantt-chart) (ganttify-with-probes clh filename) (store-gantt-charts clh file-id gantt-chart) (store-numerical-and-probe-data clh file-id filename) ) ;;ask if file should be inserted into tree (when (capi:confirm-yes-or-no "Add new file to tree?") (tree-insert clh file-id) ) )) (defvar numlines) (defmethod ganttify ((clh clarity-handle) f) ;clh = clarity handle (let* ((lines (file-lines f)) (split-lines (loop for line in lines collecting (split-sequence:split-sequence #\Tab line :remove-empty-subseqs t) )) (head (first split-lines)) #|header of form "term_name 0 2 TAB 2 4 TAB 4 8 want '(term name ('(0 2) . . .'(4 8)))|# (header (append (list (first head)) (loop for nums in (rest head) collecting (loop for dig in (split-sequence:split-sequence #\Space nums :remove-empty-subseqs t) collecting (cond ((equal dig nil) nil) (t (coerce (parse-decimal dig) 'double-float))))))) (numerical-lines (loop for (label . nums) in (rest split-lines) collecting (append (list label) (loop for n in nums collecting (cond ((equal n "NIL") nil) (t (coerce (parse-decimal n) 'double-float))))))) (sequences (loop for (label . nums) in numerical-lines collecting (append (list label) (list (loop for num in nums collecting (cond ((equal num nil) #\I) ;;check first so there won't be type errors ((> num 0) #\U) ((< num 0) #\D) ((= num 0) #\N) (t #\I)) into my-string finally (return (concatenate 'string my-string)))))))) (values header sequences) )) (defmethod ganttify-with-probes ((clh clarity-handle) f) ;clh = clarity handle (let* ((lines (file-lines f)) (split-lines (loop for line in lines collecting (split-sequence:split-sequence #\Tab line :remove-empty-subseqs t) )) (head (first split-lines)) (header (remove-if (lambda (x) (equal x nil)) (append (list (first head)) (loop for nums in (rest head) collecting (cond ((equal nums "NIL") nil) (t (loop for dig in (split-sequence:split-sequence #\Space nums :remove-empty-subseqs t) collecting (cond ((equal dig nil) nil) (t (coerce (parse-decimal dig) 'double-float)))))))))) (numerical-lines (loop for (label . nums) in (rest split-lines) collecting (append (list label) (loop for i from 0 to (- (length nums) 1) by 2 collecting (cond ((equal (nth i nums) "NIL") nil) (t (coerce (parse-decimal (nth i nums)) 'double-float))))))) (sequences (loop for (label . nums) in numerical-lines collecting (append (list label) (list (loop for num in nums collecting (cond ((equal num nil) #\I) ;;check first so there won't be type errors ((> num 0) #\U) ((< num 0) #\D) ((= num 0) #\N) (t #\I)) into my-string finally (return (concatenate 'string my-string)) ))))) ) ; paired-lines (values header sequences) )) (defmethod store-gantt-charts ((clh clarity-handle) data_id chart-list) ;;store multiple gantt charts comprising a data set ;;would be better to use this and not have to specify each col individually (let ((window-string "")) (loop for c in chart-list do(store-gantt-chart clh data_id c) ) )) (defvar *head) (defmethod store-numerical-data ((clh clarity-handle) data-id file) (let* ((lines (file-lines file)) (split-lines (loop for line in lines collecting (split-sequence:split-sequence #\Tab line :remove-empty-subseqs t) )) (head (first split-lines)) #|header of form "term_name 0 2 TAB 2 4 TAB 4 8 want '(term name ('(0 2) . . .'(4 8)))|# (header (append (list (first head)) (loop for nums in (rest head) collecting (loop for dig in (split-sequence:split-sequence #\Space nums :remove-empty-subseqs t) collecting (coerce (parse-decimal dig) 'short-float))))) (numerical-lines (loop for (label . nums) in (rest split-lines) collecting (append (list label) (loop for n in nums collecting (cond ((equal n "NIL") 0.0) ;;bad fix! BAD! (t (coerce (parse-decimal n) 'short-float))))))) ) (setf *head header) (loop for (term . rest) in numerical-lines do(loop for i = 1 then (incf i) for window in rest do(sql:with-transaction (sql:insert-records :into [|numerical_data|] :attributes '([|timecourse_data_id|] [|term_name|] [|window_start|] [|window_end|] [|data|]) :values (list data-id term (first (nth i header)) (second (nth i header)) window) :database (connection clh)))) ) ) ) (defmethod store-numerical-and-probe-data ((clh clarity-handle) data-id file) (let* ((lines (file-lines file)) (split-lines (loop for line in lines collecting (split-sequence:split-sequence #\Tab line :remove-empty-subseqs t) )) (head (first split-lines)) #|header of form "term_name 0 2 TAB 2 4 TAB 4 8 want '(term name ('(0 2) . . .'(4 8)))|# (header (remove-if (lambda (x) (equal x nil)) (append (list (first head)) (loop for nums in (rest head) collecting (cond ((equal nums "NIL") nil) (t (loop for dig in (split-sequence:split-sequence #\Space nums :remove-empty-subseqs t) collecting (cond ((equal dig nil) nil) (t (coerce (parse-decimal dig) 'short-float)))))))))) (numerical-lines (loop for (label . nums) in (rest split-lines) collecting (append (list label) (loop for i from 0 to (- (length nums) 1) by 2 collecting (list (cond ((equal (nth i nums) "NIL") 0.0) ;;bad fix! BAD! (t (coerce (parse-decimal (nth i nums)) 'short-float))) (nth (+ i 1) nums) ) ))))) (loop for (term . rest) in numerical-lines do(loop for i = 1 then (incf i) for (window-num probes) in rest do(sql:with-transaction (sql:insert-records :into [|numerical_data|] :attributes '([|timecourse_data_id|] [|term_name|] [|window_start|] [|window_end|] [|data|] [|probe_ids|]) :values (list data-id term (first (nth i header)) (second (nth i header)) window-num probes) :database (connection clh)))) ))) (defun flatten (list) (cond ((null list) list) ((atom list) (list list)) (t (append (flatten (first list)) (flatten (rest list)))))) (defvar store) (defmethod store-gantt-chart ((clh clarity-handle) data_id chart) ;;store a single gantt chart (let ((to-store) (term-name (first chart)) (string (second chart))) (setf to-store (list data_id term-name string)) (setf store to-store) (sql:with-transaction (sql:insert-records :into [|gantt_chart|] :attributes '([|timecourse_data_id|] [|term_name|] [|sequence|]) :values to-store :database (connection clh) ))) ) (defmethod lastid ((clh clarity-handle) &key table) (cond ((equal table "timecourse_data")(first (first (sql:query "SELECT MAX(id) FROM timecourse_data;" :database (connection clh))))) ((equal table "tree") (first (first (sql:query "SELECT MAX(id) FROM tree;" :database (connection clh))))) ((equal table "consensus")(first (first (sql:query "SELECT MAX(id) FROM consensus;" :database (connection clh))))) ((equal table "gantt_chart") (first (first (sql:query "SELECT MAX(id) FROM gantt_chart;" :database (connection clh))))) (t 0);unknown table )) (defun file-lines (path) "Sucks up an entire file from PATH into a list of freshly-allocated strings, returning two values: the list of strings and the number of lines read." (with-open-file (s path) (loop for line = (read-line s nil nil) while line collect line into lines counting t into line-count finally (return (values lines line-count))))) (defmethod get-children (node-id) (let ((children (first (sql:with-transaction (sql:select [node_left] [node_right] :from [|tree|] :where [= [id] node-id] :database (connection *current-clarity-handle*) ))))) (cond ((and (= (first children) 0) (= (second children) 0)) nil) ((= (first children) 0) (list (second children))) ((= (second children) 0) (list (first children))) (t children)) )) (defmethod get-node-terms ((clh clarity-handle) node-id) ;;node id is TREE id. may be either a consensus seq or real data (if (get-children node-id) ;;then it's a consensus (flatten (get-consensus-terms clh node-id)) ;;is actual data (flatten (get-data-terms clh (first (sql:with-transaction (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] node-id] :database (connection clh) :flatp t))))) ) ) (defmethod get-all-data ((clh clarity-handle)) (sql:with-transaction (sql:select [id] [datafile_path] [creation_date] :from [|timecourse_data|] :database (connection clh))) ) #.(sql:disable-sql-reader-syntax) ;;; end of file -- database-functions.lisp --