Google

Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

gnTranslator.cpp

Go to the documentation of this file.
00001 
00002 // File:            gnTranslator.h
00003 // Purpose:         Filter for all Sequences
00004 // Description:     translates, converts sequence
00005 // Changes:        
00006 // Version:         libGenome 0.1.0 
00007 // Author:          Aaron Darling 
00008 // Last Edited:     April 15, 2001, 11:13:00pm 
00009 // Modified by:     
00010 // Copyright:       (c) Aaron Darling 
00011 // Licenses:        Proprietary 
00013 #include "gn/gnTranslator.h"
00014 #include "gn/gnCompare.h"
00015 
00016 //      static data access, avoids static initialization order fiasco
00017 const gnTranslator *gnTranslator::ProteinDNATranslator(){
00018         const static gnTranslator* t_trans = new gnTranslator(ProteinDNATranslatorType);
00019         return t_trans;
00020 }
00021 const gnTranslator *gnTranslator::ProteinRNATranslator(){
00022         const static gnTranslator* t_trans = new gnTranslator(ProteinRNATranslatorType);
00023         return t_trans;
00024 }
00025 const gnTranslator *gnTranslator::DNAProteinTranslator(){
00026         const static gnTranslator* t_trans = new gnTranslator(DNAProteinTranslatorType);
00027         return t_trans;
00028 }
00029 const gnTranslator *gnTranslator::RNAProteinTranslator(){
00030         const static gnTranslator* t_trans = new gnTranslator(RNAProteinTranslatorType);
00031         return t_trans;
00032 }
00033 
00034 //      public:
00035 gnTranslator::gnTranslator()
00036 {
00037         use_default = false;
00038         m_defaultChar = 0;
00039         m_defaultInputWidth = 1;
00040 }
00041 
00042 gnTranslator::gnTranslator( const gnTranslator &sf )
00043 {
00044         m_name = sf.m_name;
00045         use_default = sf.use_default;
00046         m_defaultChar = sf.m_defaultChar;
00047         compare = sf.compare;
00048         m_inputTable = sf.m_inputTable;
00049         m_outputTable = sf.m_outputTable;
00050         m_defaultInputWidth = sf.m_defaultInputWidth;
00051 }
00052 gnTranslator::gnTranslator( gnTranslatorType t_type )
00053 {
00054         use_default = false;
00055         m_defaultChar = 0;
00056         switch(t_type){
00057                 case ProteinDNATranslatorType:
00058                         CreateProteinDNATranslator();
00059                         break;
00060                 case ProteinRNATranslatorType:
00061                         CreateProteinRNATranslator();
00062                         break;
00063                 case DNAProteinTranslatorType:
00064                         CreateDNAProteinTranslator();
00065                         break;
00066                 case RNAProteinTranslatorType:
00067                         CreateRNAProteinTranslator();
00068                         break;
00069         }
00070 }
00071 
00072         // gnSeqC 
00073 gnSeqC gnTranslator::Filter( const gnSeqC ch ) const{
00074         for(uint32 i=0; i < m_inputTable.size(); i++){
00075                 if(m_inputTable[i].length() == 1)
00076                         if(compare->Contains(m_inputTable[i][0], ch))
00077                                 return m_outputTable[i][0];
00078         }
00079         return m_defaultChar;
00080 }
00081 
00082 void gnTranslator::Filter( gnSeqC** seq, uint32& len ) const{
00083         uint32 curpos = 0;
00084         string output;
00085         while(curpos < len){
00086                 uint32 i=0;
00087                 for(; i < m_inputTable.size(); i++){
00088                         //don't compare if there aren't enough chars
00089                         uint32 curlen = m_inputTable[i].length();
00090                         if(len - curpos < curlen)
00091                                 continue;
00092                         if(compare->Contains(m_inputTable[i].data(), *seq + curpos, curlen)){
00093                                 output += m_outputTable[i];
00094                                 curpos += curlen;
00095                                 break;
00096                         }
00097                 }
00098                 if(i == m_inputTable.size()){
00099                         //no match was found.  
00100                         if(use_default)  //fill with the default char?
00101                                 output += m_defaultChar;
00102                         curpos += m_defaultInputWidth;
00103                 }
00104         }
00105         if(output.length() > len){
00106                 delete[] *seq;
00107                 *seq = new gnSeqC[output.length()];
00108         }
00109         len = output.length();
00110         memcpy(*seq, output.data(), len);
00111 }
00112         // string
00113 void gnTranslator::Filter( string &seq ) const{
00114         uint32 curpos = 0;
00115         uint32 len = seq.length();
00116         string output;
00117         while(curpos < len){
00118                 uint32 i=0;
00119                 for(; i < m_inputTable.size(); i++){
00120                         //don't compare if there aren't enough chars
00121                         uint32 curlen = m_inputTable[i].length();
00122                         if(len - curpos < curlen)
00123                                 continue;
00124                         if(compare->Contains(m_inputTable[i], seq.substr(curpos, curlen))){
00125                                 output += m_outputTable[i];
00126                                 curpos += curlen;
00127                                 break;
00128                         }
00129                 }
00130                 if(i == m_inputTable.size()){
00131                         //no match was found.  
00132                         if(use_default)  //fill with the default char?
00133                                 output += m_defaultChar;
00134                         curpos += m_defaultInputWidth;
00135                 }
00136         }
00137         seq = output;
00138 }
00139 
00140 // fill map
00141 void  gnTranslator::SetPair( const string& ch1, const string& ch2 )
00142 {
00143         if(ch1.length() == 0)
00144                 return; //cant have an empty input, empty output is ok
00145 
00146         m_inputTable.push_back(ch1);
00147         m_outputTable.push_back(ch2);
00148 }
00149 
00150 void gnTranslator::RemovePair( const string& ch )
00151 {
00152         for(uint32 i=0; i < m_inputTable.size(); i++){
00153                 if(m_inputTable[i] == ch){
00154                         m_inputTable.erase(m_inputTable.begin()+i);
00155                         m_outputTable.erase(m_outputTable.begin()+i);
00156                 }
00157         }
00158 }
00159 
00160 // standard comparators
00161 void gnTranslator::CreateProteinDNATranslator(){
00162         SetName( "Protein to DNA Translator" );
00163         
00164         SetDefaultChar('X');
00165         SetCompare(gnCompare::ProteinSeqCompare());
00166         m_defaultInputWidth = 1;
00167         SetPair( "F", "TTY" );
00168         SetPair( "L", "YTX" );  //fix this somehow.  how?
00169         SetPair( "I", "ATH" );
00170         SetPair( "M", "ATG" );
00171         SetPair( "V", "GTX" );
00172         SetPair( "P", "CCX" );
00173         SetPair( "T", "ACX" );
00174         SetPair( "A", "GCX" );
00175         SetPair( "Y", "TAY" );
00176         SetPair( ".", "TRR" );//fix this somehow.  how?
00177         SetPair( "H", "CAY" );
00178         SetPair( "Q", "CAR" );
00179         SetPair( "N", "AAY" );
00180         SetPair( "K", "AAR" );
00181         SetPair( "D", "GAY" );
00182         SetPair( "E", "GAR" );
00183         SetPair( "C", "TGY" );
00184         SetPair( "W", "TGG" );
00185         SetPair( "G", "GGX" );
00186 
00187         SetPair( "S", "TCX" );
00188         SetPair( "S", "AGY");
00189         SetPair( "R", "CGX");
00190         SetPair( "R", "AGR");
00191 }
00192 
00193 void gnTranslator::CreateProteinRNATranslator(){
00194         SetName( "Protein to RNA Translator" );
00195         SetDefaultChar('X');
00196         SetCompare(gnCompare::ProteinSeqCompare());
00197         m_defaultInputWidth = 1;
00198 
00199         SetPair( "F", "UUY" );
00200         SetPair( "L", "YUX" );  //fix this somehow.  how?
00201         SetPair( "I", "AUH" );
00202         SetPair( "M", "AUG" );
00203         SetPair( "V", "GUX" );
00204         SetPair( "P", "CCX" );
00205         SetPair( "U", "ACX" );
00206         SetPair( "A", "GCX" );
00207         SetPair( "Y", "UAY" );
00208         SetPair( ".", "URR" );//fix this somehow.  how?
00209         SetPair( "H", "CAY" );
00210         SetPair( "Q", "CAR" );
00211         SetPair( "N", "AAY" );
00212         SetPair( "K", "AAR" );
00213         SetPair( "D", "GAY" );
00214         SetPair( "E", "GAR" );
00215         SetPair( "C", "UGY" );
00216         SetPair( "W", "UGG" );
00217         SetPair( "G", "GGX" );
00218 
00219         SetPair( "S", "UCX" );
00220         SetPair( "S", "AGY");
00221         SetPair( "R", "CGX");
00222         SetPair( "R", "AGR");
00223 }
00224 
00225 void gnTranslator::CreateDNAProteinTranslator(){
00226         SetName( "DNA to Protein Translator" );
00227         SetDefaultChar('X');
00228         SetCompare(gnCompare::DNASeqCompare());
00229         m_defaultInputWidth = 3;
00230         use_default = true;
00231         
00232         SetPair( "TTY", "F" );
00233         SetPair( "CTX", "L" );
00234         SetPair( "TTR", "L" );
00235         SetPair( "ATH", "I" );
00236         SetPair( "ATG", "M" );
00237         SetPair( "GTX", "V" );
00238         SetPair( "CCX", "P" );
00239         SetPair( "ACX", "T" );
00240         SetPair( "GCX", "A" );
00241         SetPair( "TAY", "Y" );
00242         SetPair( "TGG", "W" );
00243         SetPair( "TGA", "." );
00244         SetPair( "TAR", "." );
00245         SetPair( "CAY", "H" );
00246         SetPair( "CAR", "Q" );
00247         SetPair( "AAY", "N" );
00248         SetPair( "AAR", "K" );
00249         SetPair( "GAY", "D" );
00250         SetPair( "GAR", "E" );
00251         SetPair( "TGY", "C" );
00252         SetPair( "GGX", "G" );
00253 
00254         SetPair( "TCX", "S" );
00255         SetPair( "AGY", "S" );
00256         SetPair( "CGX", "R" );
00257         SetPair( "AGR", "R" );
00258         
00259         SetPair( "tty", "F" );
00260         SetPair( "ctx", "L" );
00261         SetPair( "ttr", "L" );
00262         SetPair( "ath", "I" );
00263         SetPair( "atg", "M" );
00264         SetPair( "gtx", "V" );
00265         SetPair( "ccx", "P" );
00266         SetPair( "acx", "T" );
00267         SetPair( "gcx", "A" );
00268         SetPair( "tay", "Y" );
00269         SetPair( "tgg", "W" );
00270         SetPair( "tga", "." );
00271         SetPair( "tar", "." );
00272         SetPair( "cay", "H" );
00273         SetPair( "car", "Q" );
00274         SetPair( "aay", "N" );
00275         SetPair( "aar", "K" );
00276         SetPair( "gay", "D" );
00277         SetPair( "gar", "E" );
00278         SetPair( "tgy", "C" );
00279         SetPair( "ggx", "G" );
00280 
00281         SetPair( "tcx", "S" );
00282         SetPair( "agy", "S" );
00283         SetPair( "cgx", "R" );
00284         SetPair( "agr", "R" );
00285 
00286 }
00287 
00288 void gnTranslator::CreateRNAProteinTranslator(){
00289         SetName( "RNA to Protein Translator" );
00290         SetDefaultChar('X');
00291         SetCompare(gnCompare::RNASeqCompare());
00292         m_defaultInputWidth = 3;
00293         use_default = true;
00294         
00295         SetPair( "UUY", "F" );
00296         SetPair( "CUX", "L" );
00297         SetPair( "UUR", "L" );
00298         SetPair( "AUH", "I" );
00299         SetPair( "AUG", "M" );
00300         SetPair( "GUX", "V" );
00301         SetPair( "CCX", "P" );
00302         SetPair( "ACX", "T" );
00303         SetPair( "GCX", "A" );
00304         SetPair( "UAY", "Y" );
00305         SetPair( "UGG", "W" );
00306         SetPair( "UGA", "." );
00307         SetPair( "UAR", "." );
00308         SetPair( "CAY", "H" );
00309         SetPair( "CAR", "Q" );
00310         SetPair( "AAY", "N" );
00311         SetPair( "AAR", "K" );
00312         SetPair( "GAY", "D" );
00313         SetPair( "GAR", "E" );
00314         SetPair( "UGY", "C" );
00315         SetPair( "GGX", "G" );
00316 
00317         SetPair( "UCX", "S" );
00318         SetPair( "AGY", "S" );
00319         SetPair( "CGX", "R" );
00320         SetPair( "AGR", "R" );
00321 
00322 
00323         SetPair( "uuy", "F" );
00324         SetPair( "cux", "L" );
00325         SetPair( "uur", "L" );
00326         SetPair( "auh", "I" );
00327         SetPair( "aug", "M" );
00328         SetPair( "gux", "V" );
00329         SetPair( "ccx", "P" );
00330         SetPair( "acx", "T" );
00331         SetPair( "gcx", "A" );
00332         SetPair( "uay", "Y" );
00333         SetPair( "ugg", "W" );
00334         SetPair( "uga", "." );
00335         SetPair( "uar", "." );
00336         SetPair( "cay", "H" );
00337         SetPair( "car", "Q" );
00338         SetPair( "aay", "N" );
00339         SetPair( "aar", "K" );
00340         SetPair( "gay", "D" );
00341         SetPair( "gar", "E" );
00342         SetPair( "ugy", "C" );
00343         SetPair( "ggx", "G" );
00344 
00345         SetPair( "ucx", "S" );
00346         SetPair( "agy", "S" );
00347         SetPair( "cgx", "R" );
00348         SetPair( "agr", "R" );
00349 }

Generated at Fri Nov 30 15:36:52 2001 for libGenome by doxygen1.2.8.1 written by Dimitri van Heesch, © 1997-2001