;;; -*- Mode: Lisp -*- #|CLARITY: Common Lisp Data Alignment Repository Copyright (c) 2006 Samantha Kleinberg All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA contact: Samantha AT Bioinformatics DOT nyu DOT edu 715 Broadway, 10th floor New York, NY 10003|# (in-package "CLARITY") (eval-when (:compile-toplevel :load-toplevel :execute) (require "sql") (require "odbc")) #.(sql:enable-sql-reader-syntax) (defparameter *gap* #\-) (defparameter *true* 1) (defparameter *false* 0) (defstruct (alignment (:constructor new-alignment)) (score) (items)) ;;type is pair of alignment items (defstruct (alignment-item (:constructor new-alignment-item)) (dataset-id) (chart-id) (term-name) (string)) (defgeneric align-charts (clh id_1 id_2) (:documentation "Aligns two datasets, given their timecourse data ids. arguments: clarity-handle timecourse-id-1 timecourse-id-2 usage: (clarity:align-charts clarity-handle timecourse-id-1 timecourse-id-2) notes: Returns hash table of alignments hashed by score" )) (defgeneric score-pair (clh id_1 id_2) (:documentation "Given two timecourse data ids, computes their alignment and returns score computed from hash table arguments: clarity-handle timecourse-data-id-1 timecourse-data-id-2 usage: (clarity:score-pair clarity-handle timecourse-data-id-1 timecourse-data-id-2) notes: Returns single numerical value" )) (defgeneric align-to-consensus (clh timecourse-id consensus-id) (:documentation "Aligns timecourse data to consensus data" )) (defgeneric string-alignment (string1 string2) (:documentation "Given two strings, performs dynamic programming alignment and returns the alignment score arguments: string1 string2 usage: (clarity:string-alignment ""UUDDUI"" ""IUDDDI"")" )) (defgeneric score (char1 char2) (:documentation "Given two characters, this function returns the numerical value associated with alignign one to the other arguments: character1 character2 usage (clarity:score U D)" )) (defgeneric get-root (clh) (:documentation "Returns root of phylogenetic tree. arguments: clarity-handle usage: (clarity:get-root clarity-handle) notes: returns id number of root node." )) ;;right now, choose charts based on data ids (defmethod align-charts ((clh clarity-handle) id_1 id_2) ;;get intersection of datasets (let* ((alignment-hash (make-hash-table :test #'equal)) (intersection-list (sql:select [fir.term_name] [fir.timecourse_data_id] [fir.id] [fir.sequence] [sec.timecourse_data_id] [sec.id] [sec.sequence] :from '(#.(sql::make-db-identifier :val "gantt_chart FIR") #.(sql::make-db-identifier :val "gantt_chart SEC")) :where [and [= ["FIR" timecourse_data_id] id_1] [= ["SEC" timecourse_data_id] id_2] [= ["FIR" term_name] ["SEC" term_name]]] :database (connection clh) )) (alignment-items (loop for (term time_id_1 first_id first_sequence time_id_2 second_id second_sequence) in intersection-list collecting (list (new-alignment-item :dataset-id time_id_1 :chart-id first_id :term-name term :string first_sequence) (new-alignment-item :dataset-id time_id_2 :chart-id second_id :term-name term :string second_sequence)) ))) (loop for value in (loop for (left right) in alignment-items collecting (new-alignment :score (string-alignment (alignment-item-string left) (alignment-item-string right)) :items (list left right)) ) do (push value (gethash (alignment-score value) alignment-hash)) finally (return alignment-hash)) )) (defmethod score-pair ((clh clarity-handle) id_1 id_2) (let ((hash (align-charts clh (first (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] id_1] :database (connection clh)))) (first (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] id_2] :database (connection clh))))))) (loop for key being the hash-keys of hash with alignmentSum=0 summing (* key (length (gethash key hash))) into alignmentSum finally (return alignmentSum) )) ) (defmethod align-to-consensus ((clh clarity-handle) timecourse-id consensus-id) ;;get intersection of datasets (let* ((alignment-hash (make-hash-table :test #'equal)) (intersection-list (sql:select [fir.term_name] [fir.timecourse_data_id] [fir.id] [fir.sequence] [sec.tree_id] [sec.id] [sec.sequence] :from '(#.(sql::make-db-identifier :val "gantt_chart FIR") #.(sql::make-db-identifier :val "consensus SEC")) :where [and [= ["FIR" timecourse_data_id] timecourse-id] [= ["SEC" tree_id] consensus-id] [= ["FIR" term_name] ["SEC" term_name]]] :database (connection clh) )) (alignment-items (loop for (term time_id first_id first_sequence tree_id second_id second_sequence) in intersection-list collecting (list (new-alignment-item :dataset-id time_id :chart-id first_id :term-name term :string first_sequence) (new-alignment-item :dataset-id tree_id :chart-id second_id :term-name term :string second_sequence)) ))) (loop for value in (loop for (left right) in alignment-items collecting (new-alignment :score (string-alignment (alignment-item-string left) (alignment-item-string right)) :items (list left right)) ) do (push value (gethash (alignment-score value) alignment-hash)) finally (return alignment-hash)) )) (defmethod score-pair-consensus ((clh clarity-handle) timecourse-id consensus-id) (let ((hash (align-to-consensus clh (first (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] timecourse-id] :database (connection clh)))) consensus-id))) (loop for key being the hash-keys of hash with alignmentSum=0 summing (* key (length (gethash key hash))) into alignmentSum finally (return alignmentSum) ) ) ) (defmethod score (char1 char2) (declare (type character char1 char2)) (cond ((char= char1 char2) 3) ;((char= char1 *gap*) -1) ;((char= char2 *gap*) -1) ((and (char= char1 #\U) (char= char2 #\D)) -2) ((and (char= char1 #\D) (char= char2 #\U)) -2) (t -1) )) (defmethod string-alignment (string1 string2) (let* ((l1 (length string1)) (l2 (length string2)) (table (make-array (list (+ l1 1) (+ l2 1)) :initial-element 0))) ;;initialize table (loop for i from 1 to l1 do (setf (aref table i 0) (+ (aref table (- i 1) 0) (score (char string1 (- i 1)) *gap*))) ) (loop for k from 1 to l2 do (setf (aref table 0 k) (+ (aref table 0 (- k 1)) (score *gap* (char string2 (- k 1)))))) ;;fill in rest of table and return score in lower right corner (loop for k from 1 to l2 do (loop for i from 1 to l1 do (setf (aref table i k) (max (+ (aref table (- i 1) k) (score (char string1 (- i 1)) *gap*)) (+ (aref table (- i 1) (- k 1)) (score (char string1 (- i 1)) (char string2 (- k 1)))) (+ (aref table i (- k 1)) (score *gap* (char string2 (- k 1))))))) finally (return (aref table l1 l2 ))) )) (defmethod get-root ((clh clarity-handle)) ;;returns tree id of root node (first (sql:with-transaction (sql:select [id] :from [|tree|] :where [= [is_root] 1] :database (connection clh) :flatp t)))) (defmethod tree-insert ((clh clarity-handle) timecourse_data_id) (let ((root (get-root clh))) (if root ;there is a root (let ((new-node-id)) ;;make node for new-node (sql:with-transaction (sql:insert-records :into [|tree|] :attributes '([|timecourse_data_id|] [|node_left|] [|node_right|]) :values (list timecourse_data_id 0 0) :database (connection clh))) (setf new-node-id (lastid clh :table "tree")) (setf *new-best-node* 0) (setf *new-best-score* 0) (multiple-value-bind (best-node best-score) (tree-insert-recursive clh new-node-id root) ;;if there is no best node, insert as sibling of root (insert-node clh new-node-id (cond ((= best-node 0) root) (t best-node))))) ;no root, data to insert becomes root. this is the base case (sql:with-transaction (sql:insert-records :into [|tree|] :attributes '([|timecourse_data_id|] [|is_root|] [|node_left|] [|node_right|]) :values (list timecourse_data_id *true* 0 0) :database (connection clh))) ) )) ;;node referred to by node-id. if node has no children, it has timecourse data. ;; if it's an internal node, it has simply a consensus sequence" (defvar *new-best-node*) (defvar *new-best-score*) (defmethod tree-insert-recursive ((clh clarity-handle) new-node current-node) #| (if (eq depth 0) (score-pair clh new-node current-node) )|# (destructuring-bind (left right) (first (sql:select [node_left] [node_right] :from [|tree|] :where [= [id] current-node] :database (connection clh))) (let ((score (cond ((and (= 0 left) (= 0 right)) ;;if leaf node, use reg-score-pair (score-pair clh new-node current-node)) (t (score-pair-consensus clh new-node current-node)))) ; (new-best-node best-node) ; (new-best-score best-score)) ) (when (> score *new-best-score*) (setf *new-best-node* current-node) (setf *new-best-score* score) ) (if (> left 0) (tree-insert-recursive clh new-node left)) (if (> right 0) (tree-insert-recursive clh new-node right)) (values *new-best-node* *new-best-score*) ))) (defmethod insert-node ((clh clarity-handle) new-node-id sibling-node-id) (let ((new-parent-id) (new-node-data-id (first (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] new-node-id] :database (connection clh))))) (new-consensus)) (destructuring-bind (sib-left sib-right sib-data-id is-root) (first (sql:select [node_left] [node_right][timecourse_data_id][is_root] :from [|tree|] :where [= [|id|] sibling-node-id] :database (connection clh) )) (when is-root ;;if sibling was root, it no longer is, then parent becomes root (sql:update-records [|tree|] :where [= [|id|] sibling-node-id] :attributes '([|is_root|]) :values '(0) :database (connection clh)) ) ;;make new-node for parent (sql:with-transaction (sql:insert-records :into [|tree|] :attributes '([|timecourse_data_id|] [|node_left|] [|node_right|][is_root]) :values (list 0 sibling-node-id new-node-id is-root) :database (connection clh))) (setf new-parent-id (lastid clh :table "tree")) ;;update parent's consensus seq with it's id ;;actualyl consensus id IS node id! ;;update sibling's original parent ;;could have been a left child or a right child. try both. ;;get id to update old parent's consensus (let ((old-parent-id (or (first (sql:with-transaction (sql:select [|id|] :from [|tree|] :where [and [= [node_left] sibling-node-id] [not [= [id] new-parent-id]]] :database (connection clh) :flatp t))) (first (sql:with-transaction (sql:select [|id|] :from [|tree|] :where [and [= [node_right] sibling-node-id] [not [= [id] new-parent-id]]] :database (connection clh) :flatp t)))))) (sql:with-transaction (sql:update-records [|tree|] :where [and [= [node_left] sibling-node-id] [= [id] old-parent-id]] :attributes '([|node_left|]) :values (list new-parent-id) :database (connection clh))) (sql:with-transaction (sql:update-records [|tree|] :where [and [= [node_right] sibling-node-id] [= [id] old-parent-id]] :attributes '([|node_right|]) :values (list new-parent-id) :database (connection clh))) ;;build consensus sequence ;;when buildng consensus: right is always real data, since new. left may or may not be consensus (build-consensus clh new-parent-id sibling-node-id new-node-data-id) (when old-parent-id ;;after making consensus sequence, for new node, need to update parent of new node's consensus! ;;delete old consensus from table (sql:with-transaction (sql:delete-records :from [|consensus|] :where [= [tree_id] old-parent-id] :database (connection clh))) ;;build consensus to replace old (build-internal-consensus clh old-parent-id)) )))) (defvar *test*) (defvar *test-list*) (defmethod build-consensus ((clh clarity-handle) node-id data-left data-right) ;;right is always timecourse, left may be either (destructuring-bind (data-id) (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] data-left] :database (connection clh))) (let ((term-seq-list (cond ((> data-id 0) ;;if left is timecourse_data (sql:select [lchild.term_name] [lchild.sequence] :from '(#.(sql::make-db-identifier :val "gantt_chart LCHILD") #.(sql::make-db-identifier :val "gantt_chart RCHILD")) :where [and [= ["LCHILD" timecourse_data_id] data-id] [= ["RCHILD" timecourse_data_id] data-right] [= ["LCHILD" term_name] ["RCHILD" term_name]] [= ["LCHILD" sequence] ["RCHILD" sequence]]] :database (connection clh))) ; ((> consensus-id 0) ;;left is consensus data (t (sql:select [lchild.term_name] [lchild.sequence] :from '(#.(sql::make-db-identifier :val "consensus LCHILD") #.(sql::make-db-identifier :val "gantt_chart RCHILD")) :where [and [= ["LCHILD" tree_id] data-left] [= ["RCHILD" timecourse_data_id] data-right] [= ["LCHILD" term_name] ["RCHILD" term_name]] [= ["LCHILD" sequence] ["RCHILD" sequence]]] :database (connection clh))) #|(t (print "error!!!"))|# ))) (setf *test-list* term-seq-list) (when term-seq-list (loop for (term seq) in term-seq-list do(store-consensus-sequence clh node-id term seq) )) ))) (defmethod build-internal-consensus ((clh clarity-handle) node-id) ;;at least one node is consensus, other may be either (destructuring-bind (data-left data-right) (first (sql:select [|node_left|] [|node_right|] :from [|tree|] :where [= [|id|] node-id] :database (connection clh))) (let ((left-data-id (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] data-left] :database (connection clh) :flatp t))) (right-data-id (first (sql:select [timecourse_data_id] :from [|tree|] :where [= [id] data-right] :database (connection clh) :flatp t)))) (let ((term-seq-list (cond ((and (> left-data-id 0) (> right-data-id 0)) ;;if left is timecourse_data (sql:select [lchild.term_name] [lchild.sequence] :from '(#.(sql::make-db-identifier :val "gantt_chart LCHILD") #.(sql::make-db-identifier :val "gantt_chart RCHILD")) :where [and [= ["LCHILD" timecourse_data_id] left-data-id] [= ["RCHILD" timecourse_data_id] right-data-id] [= ["LCHILD" term_name] ["RCHILD" term_name]] [= ["LCHILD" sequence] ["RCHILD" sequence]]] :database (connection clh))) ((and (> left-data-id 0) (= right-data-id 0)) ;;if left is timecourse_data (sql:select [lchild.term_name] [lchild.sequence] :from '(#.(sql::make-db-identifier :val "gantt_chart LCHILD") #.(sql::make-db-identifier :val "consensus RCHILD")) :where [and [= ["LCHILD" timecourse_data_id] left-data-id] [= ["RCHILD" tree_id] data-right] [= ["LCHILD" term_name] ["RCHILD" term_name]] [= ["LCHILD" sequence] ["RCHILD" sequence]]] :database (connection clh))) ((and (= left-data-id 0) (= right-data-id 0)) ;;if left is timecourse_data (sql:select [lchild.term_name] [lchild.sequence] :from '(#.(sql::make-db-identifier :val "consensus LCHILD") #.(sql::make-db-identifier :val "consensus RCHILD")) :where [and [= ["LCHILD" tree_id] data-left] [= ["RCHILD" tree_id] data-right] [= ["LCHILD" term_name] ["RCHILD" term_name]] [= ["LCHILD" sequence] ["RCHILD" sequence]]] :database (connection clh))) (t (sql:select [lchild.term_name] [lchild.sequence] :from '(#.(sql::make-db-identifier :val "consensus LCHILD") #.(sql::make-db-identifier :val "gantt_chart RCHILD")) :where [and [= ["LCHILD" tree_id] data-left] [= ["RCHILD" timecourse_data_id] right-data-id] [= ["LCHILD" term_name] ["RCHILD" term_name]] [= ["LCHILD" sequence] ["RCHILD" sequence]]] :database (connection clh))) ))) term-seq-list (when term-seq-list (loop for (term seq) in term-seq-list do(store-consensus-sequence clh node-id term seq) )) )))) (defmethod store-consensus-sequence ((clh clarity-handle) node-id term-name sequence) (sql:with-transaction (sql:insert-records :into [|consensus|] :attributes '([|tree_id|] [|term_name|] [|sequence|]) :values (list node-id term-name sequence) :database (connection clh))) ) (defmethod get-consensus-terms ((clh clarity-handle) node-id) ;;node-id is tree id (sql:with-transaction (sql:select [term_name] :from [|consensus|] :where [= [tree_id] node-id] :database (connection clh))) ) (defmethod get-data-terms ((clh clarity-handle) data-id) (sql:with-transaction (sql:select [term_name] :from [|gantt_chart|] :where [= [timecourse_data_id] data-id] :database (connection clh))) ) #.(sql:disable-sql-reader-syntax) ;;; end of file -- alignment-functions.lisp --