Skip to content
Snippets Groups Projects
GFFDocumentEntry.java 30.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* GFFDocumentEntry.java
     *
     * created: Tue Sep 14 1999
     *
     * This file is part of Artemis
     *
    
    tjc's avatar
    tjc committed
     * Copyright (C) 1999-2005  Genome Research Limited
    
    tjc's avatar
    tjc committed
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
    
    tjc's avatar
    tjc committed
     * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/GFFDocumentEntry.java,v 1.69 2009-09-03 13:47:31 tjc Exp $
    
    tjc's avatar
    tjc committed
     */
    
    package uk.ac.sanger.artemis.io;
    
    
    import uk.ac.sanger.artemis.chado.FeatureLocLazyQualifierValue;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.components.Splash;
    
    import uk.ac.sanger.artemis.components.filetree.LocalAndRemoteFileManager;
    
    import uk.ac.sanger.artemis.components.genebuilder.GeneUtils;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.util.*;
    
    
    tjc's avatar
    tjc committed
    import java.io.IOException;
    
    tjc's avatar
    tjc committed
    import java.util.Hashtable;
    import java.util.Enumeration;
    
    tjc's avatar
    tjc committed
    import java.util.Iterator;
    import java.util.List;
    import java.util.Set;
    
    import java.util.Vector;
    
    import java.sql.Timestamp;
    
    tjc's avatar
    tjc committed
    
    
    import org.gmod.schema.cv.CvTerm;
    
    import org.gmod.schema.sequence.FeatureLoc;
    
    tjc's avatar
    tjc committed
    /**
     *  A DocumentEntry that can read an GFF entry from a Document.
     *
     *  @author Kim Rutherford
    
    tjc's avatar
    tjc committed
     *  @version $Id: GFFDocumentEntry.java,v 1.69 2009-09-03 13:47:31 tjc Exp $
    
    tjc's avatar
    tjc committed
     **/
    
    public class GFFDocumentEntry extends SimpleDocumentEntry
    
    tjc's avatar
    tjc committed
        implements DocumentEntry 
    {
    
    tjc's avatar
    tjc committed
      private boolean finished_constructor = false;
    
    tjc's avatar
    tjc committed
      private boolean isReadOnly = false;
    
    tjc's avatar
    tjc committed
      
    
    tjc's avatar
    tjc committed
      /**
       *  Create a new GFFDocumentEntry object associated with the given
       *  Document.
       *  @param document This is the file that we will read from.  This is also
       *    used for saving the entry back to the file it came from and to give
       *    the new object a name.
       *  @param listener The object that will listen for ReadEvents.
       *  @exception IOException thrown if there is a problem reading the entry -
       *    most likely ReadFormatException.
       **/
    
    tjc's avatar
    tjc committed
      GFFDocumentEntry(final Document document, final ReadListener listener)
          throws IOException, EntryInformationException 
      {
        super(new GFFEntryInformation(), document, listener);
    
    tjc's avatar
    tjc committed
        // join the separate exons into one feature (if appropriate)
    
    tcarver's avatar
    tcarver committed
        final FeatureVector original_features = getAllFeatures();
    
    tcarver's avatar
    tcarver committed
        if(original_features.size() > 0 && GFFStreamFeature.isGTF((Feature)original_features.get(0)))
    
    tcarver's avatar
    tcarver committed
        {
          // GTF
          mergeGtfFeatures(original_features, "CDS");
          mergeGtfFeatures(original_features, "exon");
        }
        else 
        {
          // GFF
          combineGeneFeatures(original_features);
        }
    
    tjc's avatar
    tjc committed
        finished_constructor = true;
      }
    
      /**
       *  Create a new GFFDocumentEntry that will be a copy of the given Entry and
       *  has no Document associated with it.  The new GFFDocumentEntry cannot be
    
    tjc's avatar
    tjc committed
       *  saved to a file with save() unless save(Document) has been called
    
    tjc's avatar
    tjc committed
       *  first.  Some qualifier and location information will be lost.
       *  @param force If true then invalid qualifiers and any features with
       *    invalid keys in the new Entry will be quietly thrown away.  "Invalid"
       *    means that the key/qualifier is not allowed to occur in an Entry of
       *    this type (probably determined by the EntryInformation object of this
       *    Entry).  If false an EntryInformationException will be thrown for
       *    invalid keys or qualifiers.
       **/
    
    tjc's avatar
    tjc committed
      public GFFDocumentEntry(final Entry new_entry, final boolean force)
          throws EntryInformationException 
      {
        super(new GFFEntryInformation(), new_entry, force);
    
    tjc's avatar
    tjc committed
        finished_constructor = true;
      }
    
      /**
       *  Create a new empty GFFDocumentEntry object that has no Document
       *  associated with it.  The new GFFDocumentEntry cannot be saved to a
    
    tjc's avatar
    tjc committed
       *  file with save() unless save(Document) has been called first.  The
       *  save(Document) method will assign a Document.
    
    tjc's avatar
    tjc committed
       **/
    
    tjc's avatar
    tjc committed
      public GFFDocumentEntry(final EntryInformation entry_information) 
      {
        super(new GFFEntryInformation());
    
    tjc's avatar
    tjc committed
        finished_constructor = true;
      }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      /**
       *  Returns true if and only if this entry is read only.  For now this
       *  always returns true - GFFDocumentEntry objects can't be changed.
       **/
    
    tjc's avatar
    tjc committed
      /**
       *  Returns true if and only if this entry is read only.  For now this
       *  always returns true - BlastDocumentEntry objects can't be changed.
       **/
    
    tjc's avatar
    tjc committed
      public boolean isReadOnly() 
      {
    
    tjc's avatar
    tjc committed
        return isReadOnly;
      }
      
      public void setReadOnly(final boolean isReadOnly)
      {
        this.isReadOnly = isReadOnly;
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  If the given feature can be added directly to this Entry, then return
       *  it, otherwise create and return a new feature of the appropriate type.
       *  @param copy if true then always new a new copy of the Feature.
       **/
    
      protected Object makeNativeFeature(final Feature feature,
                                         final boolean copy) 
    
    tjc's avatar
    tjc committed
      {
        if(!copy && feature instanceof GFFStreamFeature) 
          return (GFFStreamFeature)feature;
        else 
    
    tjc's avatar
    tjc committed
        {
          if(PublicDBDocumentEntry.IGNORE_OBSOLETE_FEATURES)
          {
            Qualifier isObsoleteQualifier = 
              feature.getQualifiers().getQualifierByName("isObsolete");
            if(isObsoleteQualifier != null)
            {
              String value = (String)isObsoleteQualifier.getValues().get(0);
              if(Boolean.parseBoolean(value))
                return null;
            }
          }
    
    tjc's avatar
    tjc committed
          return new GFFStreamFeature(feature);
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  If the given Sequence can be added directly to this Entry, then return a
       *  copy of it, otherwise create and return a new feature of the appropriate
       *  type for this Entry.
       **/
    
    tjc's avatar
    tjc committed
      protected StreamSequence makeNativeSequence(final Sequence sequence)
      {
        return new FastaStreamSequence(sequence);
    
    tjc's avatar
    tjc committed
      }
    
    
    tcarver's avatar
    tcarver committed
      private void combineGeneFeatures(FeatureVector original_features)
    
    tjc's avatar
    tjc committed
      {
        Feature this_feature;
        Hashtable chado_gene = new Hashtable();
        try
        {
          // find the genes
          for(int i = 0 ; i < original_features.size() ; ++i) 
          {
            this_feature = original_features.featureAt(i);
    
            final String key = this_feature.getKey().getKeyString();
    
    tjc's avatar
    tjc committed
            if(this_feature instanceof GFFStreamFeature &&
               (GeneUtils.isHiddenFeature(key) ||
                GeneUtils.isObsolete((GFFStreamFeature)this_feature)))
    
              ((GFFStreamFeature)this_feature).setVisible(false);
    
    tjc's avatar
    tjc committed
            
    
            if(key.equals("gene") || key.equals("pseudogene"))
    
    tjc's avatar
    tjc committed
            {
    
    tjc's avatar
    tjc committed
              final Qualifier idQualifier = this_feature.getQualifierByName("ID");
              if(idQualifier != null)
              {
                String id = (String)this_feature.getQualifierByName("ID").getValues().get(0);
                ChadoCanonicalGene gene = new ChadoCanonicalGene();
                gene.setGene(this_feature);
                chado_gene.put(id, gene);
                ((GFFStreamFeature)this_feature).setChadoGene(gene);
              }
    
    tjc's avatar
    tjc committed
            }
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          // find the transcripts
    
    tjc's avatar
    tjc committed
          Hashtable transcripts_lookup = new Hashtable();
    
    tjc's avatar
    tjc committed
          for(int i = 0 ; i < original_features.size() ; ++i) 
          {
            this_feature = original_features.featureAt(i);
            
            // transcript 
            Qualifier parent_qualifier = this_feature.getQualifierByName("Parent");
            
            if(parent_qualifier == null)
              continue;
    
            StringVector parents = parent_qualifier.getValues();
            for(int j=0; j<parents.size(); j++)
            {
              String parent = (String)parents.get(j);
              
              if(chado_gene.containsKey(parent))
              {
                // store transcript
                ChadoCanonicalGene gene = (ChadoCanonicalGene)chado_gene.get(parent);
                gene.addTranscript(this_feature);
    
                ((GFFStreamFeature)this_feature).setChadoGene(gene);
    
    tjc's avatar
    tjc committed
                
                // store the transcript ID with its ChadoCanonicalGene object
                transcripts_lookup.put((String)this_feature.getQualifierByName("ID").getValues().get(0),
                                       gene);
    
    tjc's avatar
    tjc committed
                continue;
              }
            }
          }
          
    
    tjc's avatar
    tjc committed
          
    
    tjc's avatar
    tjc committed
          // find exons & protein
    
    tjc's avatar
    tjc committed
          String key;
    
    tjc's avatar
    tjc committed
          for(int i = 0 ; i < original_features.size() ; ++i) 
          {
            this_feature = original_features.featureAt(i);
            // exons
    
    tjc's avatar
    tjc committed
            key = this_feature.getKey().getKeyString();
    
    tcarver's avatar
    tcarver committed
    
    
            final Qualifier parent_qualifier  = this_feature.getQualifierByName("Parent");
            final Qualifier derives_qualifier = this_feature.getQualifierByName("Derives_from");
    
    tjc's avatar
    tjc committed
            if(parent_qualifier == null && derives_qualifier == null)
    
    tjc's avatar
    tjc committed
              continue;    
              
    
            final Qualifier featureRelationship = 
    
    tjc's avatar
    tjc committed
              this_feature.getQualifierByName("feature_relationship_rank");
    
    tjc's avatar
    tjc committed
            // compare this features parent_id's to transcript id's in the 
            // chado gene hash to decide if it is part of it
            final StringVector parent_id;
            
            if(parent_qualifier != null)
              parent_id = parent_qualifier.getValues();
            else
              parent_id = derives_qualifier.getValues();
            
    
    tjc's avatar
    tjc committed
            for(int j=0; j<parent_id.size(); j++)
            {
    
              final String parent = (String)parent_id.get(j);
    
    tjc's avatar
    tjc committed
             
    
    tjc's avatar
    tjc committed
              if(transcripts_lookup.containsKey(parent))
              {
    
                final ChadoCanonicalGene gene = (ChadoCanonicalGene)transcripts_lookup.get(parent);
                ((GFFStreamFeature)this_feature).setChadoGene(gene);
                
    
    tjc's avatar
    tjc committed
                if(parent_qualifier == null)
    
    tjc's avatar
    tjc committed
                {
    
                  //((GFFStreamFeature)this_feature).setVisible(false);
    
    tjc's avatar
    tjc committed
                  gene.addProtein(parent, this_feature);
    
    tjc's avatar
    tjc committed
                }
    
    tjc's avatar
    tjc committed
                else if(key.equals("three_prime_UTR"))
                  gene.add3PrimeUtr(parent, this_feature);
                else if(key.equals("five_prime_UTR"))
                  gene.add5PrimeUtr(parent, this_feature);
    
                else if(key.equals(DatabaseDocument.EXONMODEL) || key.equals("exon") || 
                        featureRelationship != null ||
    
                        key.equals("pseudogenic_exon"))
    
    tjc's avatar
    tjc committed
                  gene.addSplicedFeatures(parent, this_feature);
    
    tjc's avatar
    tjc committed
                else
                  gene.addOtherFeatures(parent, this_feature);
    
    tjc's avatar
    tjc committed
          }
    
    tjc's avatar
    tjc committed
      
    
          if(getDocument() instanceof DatabaseDocument)
    
          {
            DatabaseDocument doc = (DatabaseDocument)getDocument();
    
    tjc's avatar
    tjc committed
            loadFeatureLocLazyData(original_features); 
            if(doc.isLazyFeatureLoad())
    
            {
              // using lazy loading - add the lazy chado feature to GFFStreamFeature
              final Hashtable idFeatureStore = doc.getIdFeatureStore();
              for(int i = 0 ; i < original_features.size() ; ++i) 
              {
                this_feature = original_features.featureAt(i);
                String featureId = (String) this_feature.getQualifierByName("feature_id").getValues().get(0);
                org.gmod.schema.sequence.Feature chadoLazyFeature = 
                  (org.gmod.schema.sequence.Feature)idFeatureStore.get(featureId);
                ((GFFStreamFeature)this_feature).setChadoLazyFeature(chadoLazyFeature);
              }
              idFeatureStore.clear();
            }
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          // now join exons
    
    tjc's avatar
    tjc committed
          //combineFeatures();
    
    tjc's avatar
    tjc committed
          Enumeration enum_genes = chado_gene.elements();
          while(enum_genes.hasMoreElements())
          {
            ChadoCanonicalGene gene = (ChadoCanonicalGene)enum_genes.nextElement();
            combineChadoExons(gene);
    
    tcarver's avatar
    tcarver committed
    
    
            // inferring CDS and UTRs
            if(DatabaseDocument.CHADO_INFER_CDS)
            {
              final Vector transcripts = (Vector)gene.getTranscripts();
              gene.correctSpliceSiteAssignments();
              
              for(int i=0; i<transcripts.size(); i++)
              {
                GFFStreamFeature transcript = (GFFStreamFeature)transcripts.get(i);
                String transcript_id = null;
                transcript_id = GeneUtils.getUniqueName(transcript);
                
                List exons = gene.getSpliceSitesOfTranscript(transcript_id, "exon");
                if(exons == null)
                  continue;
                
                Iterator it = exons.iterator();
                while(it.hasNext())
                {
                  final GFFStreamFeature exonFeature = (GFFStreamFeature)it.next();
    
                  QualifierVector qualifiers = new QualifierVector();
                  qualifiers.add(new Qualifier("ID", transcript_id+":CDS"));
                  qualifiers.add(new Qualifier("Parent", transcript_id));
                  
                  DatabaseInferredFeature cdsFeature = new DatabaseInferredFeature(
                      Key.CDS, exonFeature.getLocation(), qualifiers, gene);
                  
                  try
                  {
                    gene.addSplicedFeatures(transcript_id, cdsFeature);
                    forcedAdd(cdsFeature);
                  }
                  catch (ReadOnlyException e)
                  {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                  }
                  
                }
              }
            }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        }
        catch(InvalidRelationException e)
        {
          e.printStackTrace();
        }
      }
      
    
    tjc's avatar
    tjc committed
      /**
    
       * Get 'similarity', polypeptide_domain qualifiers
    
    tjc's avatar
    tjc committed
       * @param fv
       * @throws InvalidRelationException
       */
    
      private void loadFeatureLocLazyData(final FeatureVector fv)
    
    tjc's avatar
    tjc committed
              throws InvalidRelationException
      {
    
        final DatabaseDocument doc = (DatabaseDocument)getDocument();
    
        if(fv.size() < 30 && fv.size() > 0)  // if just a few features to look up e.g. for gene editor
    
        {
          List featureIds = new Vector(fv.size());
          for(int i=0;i<fv.size(); i++)
          {
            Qualifier featureIdQualifier = fv.featureAt(i).getQualifierByName("feature_id");
            featureIds.add( (String)featureIdQualifier.getValues().get(0) );
          }
          matches = doc.getSimilarityMatches(featureIds);
        }
        else
          matches = doc.getSimilarityMatches(null);
        
    
        if(matches == null || matches.size() < 1)
          return;
    
        final Hashtable temp_lookup_hash = new Hashtable(matches.size()/2);
    
    tjc's avatar
    tjc committed
        String f_id;
        for(int i=0; i<fv.size(); i++)
        {
          Feature f = (Feature)fv.elementAt(i);
          Qualifier qualifier = ((Feature)f).getQualifierByName("feature_id");
          
          if(qualifier != null)
          {
            f_id = (String)qualifier.getValues().get(0);
            temp_lookup_hash.put(f_id, f);
          }
        }
        
    
    tjc's avatar
    tjc committed
        // bulk load featureLocs and create lookup hash 
    
        final Hashtable hashFeatureLocs = getFeatureLocsHash(doc, matches);
        if(hashFeatureLocs == null)
          return;
        
    
        final Hashtable cvTermCache = new Hashtable();
    
    tjc's avatar
    tjc committed
        final Feature f = (Feature)fv.elementAt(0);
    
    tjc's avatar
    tjc committed
        for(int i=0; i<matches.size(); i++)
        {
          org.gmod.schema.sequence.Feature matchFeature =
               (org.gmod.schema.sequence.Feature)matches.get(i);
          
    
    tjc's avatar
    tjc committed
          final CvTerm cvTerm = getCvTermFromCache(cvTermCache, matchFeature, f);
    
          final String qualifierName;
          if(cvTerm.getName().indexOf("match") > -1)
            qualifierName = "similarity";
          else
            qualifierName = cvTerm.getName();
          
    
          final List featureLocs = 
            (List) hashFeatureLocs.get(new Integer(matchFeature.getFeatureId()));
          if(featureLocs == null)
            continue;
          
          matchFeature.setFeatureLocsForFeatureId(featureLocs);
          //java.util.Collection featureLocs = matchFeature.getFeatureLocsForFeatureId();
    
    tjc's avatar
    tjc committed
          java.util.Iterator it = featureLocs.iterator();
          while(it.hasNext())
          {
    
    tjc's avatar
    tjc committed
            final FeatureLoc featureLoc = (FeatureLoc)it.next();
    
            final Feature queryFeature = 
    
    tjc's avatar
    tjc committed
              (Feature)temp_lookup_hash.get(Integer.toString(featureLoc.getSrcFeatureId()));
            
            if(queryFeature != null)
    
    tjc's avatar
    tjc committed
            {
    
              Qualifier qualifier = queryFeature.getQualifierByName(qualifierName);
              final FeatureLocLazyQualifierValue sim = 
                new FeatureLocLazyQualifierValue(matchFeature, featureLoc.getSrcFeatureId());
    
    tjc's avatar
    tjc committed
              
              if(qualifier == null || !(qualifier instanceof QualifierLazyLoading))
    
                qualifier = new QualifierLazyLoading(qualifierName, sim);
    
    tjc's avatar
    tjc committed
              else
                ((QualifierLazyLoading)qualifier).addValue(sim);
    
              try
              {
                queryFeature.setQualifier(qualifier);
              }
              catch(ReadOnlyException e)
              {
                e.printStackTrace();
              }
              catch(EntryInformationException e)
              {
                e.printStackTrace();
              }
    
              
              if(qualifierName.equals("polypeptide_domain") && 
                 LocalAndRemoteFileManager.domainLoad.isSelected()) 
                addDomain(queryFeature, featureLoc, matchFeature);
    
    
        cvTermCache.clear();
    
    tjc's avatar
    tjc committed
        temp_lookup_hash.clear(); 
      }
      
    
    tjc's avatar
    tjc committed
      /**
       * Retrieve a CvTerm from a Hashtable with keys equal to the cvterm_id
       * and values of the corresponding  CvTerm. If the term is not in the cache 
       * then look it up in the main DatabaseDocument cache.
       * @param cvTermCache
       * @param matchFeature
       * @param f
       * @return
       */
      private CvTerm getCvTermFromCache(final Hashtable cvTermCache,
                                        final org.gmod.schema.sequence.Feature matchFeature,
                                        final Feature f)
      {
        final Integer cvTermId = new Integer(matchFeature.getCvTerm().getCvTermId());
        final CvTerm cvTerm;
        
        if(cvTermCache.containsKey(cvTermId))
          cvTerm = (CvTerm) cvTermCache.get(cvTermId);
        else
        {
          cvTerm = DatabaseDocument.getCvTermByCvTermId(
              matchFeature.getCvTerm().getCvTermId(), f);
          cvTermCache.put(cvTermId, cvTerm);
        }
        
        matchFeature.setCvTerm(cvTerm);
        return cvTerm;
      }
      
    
      /**
       * Add domain features as read-only features
       * @param queryFeature
       * @param featureLoc
       * @param matchFeature
       */
      private void addDomain(final Feature queryFeature, 
                             final FeatureLoc featureLoc,
                             org.gmod.schema.sequence.Feature matchFeature)
      {
        try
        {
          int start = queryFeature.getLocation().getFirstBase();
    
    tcarver's avatar
    tcarver committed
          Location location = null;
    
          
          ChadoCanonicalGene chadoGene = ((GFFStreamFeature)queryFeature).getChadoGene();
          if(chadoGene != null)
            location = chadoGene.getNucLocation(queryFeature, featureLoc);
          else if(queryFeature.getLocation().isComplement())
            location = new Location("complement("+
                (start+(featureLoc.getFmin()*3)+1)+".."+(start+(featureLoc.getFmax()*3))+")");
    
    tcarver's avatar
    tcarver committed
          if(location == null)
    
            location = new Location(
                (start+(featureLoc.getFmin()*3)+1)+".."+(start+(featureLoc.getFmax()*3)));
       
          final GFFStreamFeature newFeature = new GFFStreamFeature(
              new Key("polypeptide_domain"), 
              location, null);
          newFeature.setReadOnlyFeature(true);
          newFeature.setChadoLazyFeature(matchFeature);
          add(newFeature);
        }
        catch (Exception e)
        {
          e.printStackTrace();
        }
      }
      
    
    tjc's avatar
    tjc committed
    
    
      /**
       * Bulk load match features featureLoc's and create a Hashtable with the
       * feature_id's as keys and a list of the corresponding featureLocs as values.
       * @param doc
       * @param matches
       * @return the hashtable; null if no featureLocs are found
       */
      private Hashtable getFeatureLocsHash(final DatabaseDocument doc, final List matches)
      {
        final List matchFeatureIds = new Vector(matches.size());
        for(int i=0; i< matches.size(); i++)
        {
          String matchFeatureId = Integer.toString( 
              ((org.gmod.schema.sequence.Feature)matches.get(i)).getFeatureId() );
          matchFeatureIds.add( matchFeatureId );
        }
        
        final List allFeatureLocs = doc.getFeatureLocsByListOfIds(matchFeatureIds);
        if(allFeatureLocs == null)
          return null;
        
        Hashtable hashFeatureLocs = new Hashtable();
        for(int i=0;i<allFeatureLocs.size(); i++)
        {
          FeatureLoc featureLoc = (FeatureLoc)allFeatureLocs.get(i);
          Integer featureId = new Integer(featureLoc.getFeatureByFeatureId().getFeatureId());
          List list;
          if(hashFeatureLocs.containsKey(featureId))
            list = (List) hashFeatureLocs.get(featureId);
          else
            list = new Vector();
          list.add(featureLoc);
          hashFeatureLocs.put(featureId, list);
        }
        return hashFeatureLocs;
      }
    
    
    tjc's avatar
    tjc committed
      /**
       *  Combine the features (which are exons) and delete the orignals from this
       *  Entry.  The key of this hash will be the group name and the value is a
       *  FeatureVector containing the feature that are in that group.  Groups
       *  that have more than one member will be combined.
       **/
    
      private void combineChadoExons(ChadoCanonicalGene gene) 
    
    tjc's avatar
    tjc committed
      {
    
        final Vector transcripts = (Vector)gene.getTranscripts();
    
    tjc's avatar
    tjc committed
        gene.correctSpliceSiteAssignments();
    
    tjc's avatar
    tjc committed
        
    
    tjc's avatar
    tjc committed
        for(int i=0; i<transcripts.size(); i++)
    
    tjc's avatar
    tjc committed
        {
    
    tjc's avatar
    tjc committed
          GFFStreamFeature transcript = (GFFStreamFeature)transcripts.get(i);
    
    tjc's avatar
    tjc committed
          String transcript_id = null;
    
    tjc's avatar
    tjc committed
          transcript_id = (String)(transcript.getQualifierByName("ID").getValues().get(0));
          
    
    tjc's avatar
    tjc committed
          Set splicedSiteTypes = gene.getSpliceTypes(transcript_id);
          if(splicedSiteTypes == null)
    
    tjc's avatar
    tjc committed
            continue;
    
    tjc's avatar
    tjc committed
          
    
    tjc's avatar
    tjc committed
          Iterator it = splicedSiteTypes.iterator();
          Vector new_set = new Vector();
          while(it.hasNext())
          {
            String type = (String)it.next();
            List splicedSites = gene.getSpliceSitesOfTranscript(transcript_id, type);
               
            if(splicedSites == null)
              continue;
    
    tjc's avatar
    tjc committed
            mergeFeatures(splicedSites, new_set, 
                          (String)(transcript.getQualifierByName("ID").getValues().get(0)));
          }
    
    tjc's avatar
    tjc committed
          for(int j=0; j<new_set.size(); j++)
    
    tjc's avatar
    tjc committed
          {
    
    tjc's avatar
    tjc committed
            if(j == 0)
              gene.addSplicedFeatures(transcript_id, 
    
    tjc's avatar
    tjc committed
                    (Feature)new_set.get(j), true );
    
    tjc's avatar
    tjc committed
            else
              gene.addSplicedFeatures(transcript_id, 
    
    tjc's avatar
    tjc committed
                    (Feature)new_set.get(j));
    
    tjc's avatar
    tjc committed
          }
    
    tjc's avatar
    tjc committed
          
        }   
      }
      
      
      private void mergeFeatures(final List gffFeatures,
                                 final List new_set, 
                                 final String transcript_id)
      {
    
        final Hashtable feature_relationship_rank_store = new Hashtable();
        final Hashtable id_range_store = new Hashtable();
        final RangeVector new_range_vector = new RangeVector();
    
    tjc's avatar
    tjc committed
        QualifierVector qualifier_vector = new QualifierVector();
        Timestamp lasttimemodified = null;
    
        for(int j = 0; j < gffFeatures.size(); j++)
        {
          final GFFStreamFeature this_feature = (GFFStreamFeature)gffFeatures.get(j);
          
          Integer rank;
          Qualifier rankQualifier = this_feature
              .getQualifierByName("feature_relationship_rank");
          if(rankQualifier == null)
            rank = new Integer(0);
          else
          {
            rank = new Integer((String) (rankQualifier.getValues().get(0)));
            this_feature.getQualifiers().removeQualifierByName("feature_relationship_rank");
          }
          
          // use the most current lastmodified datestamp
          if(this_feature.getLastModified() != null
              && (lasttimemodified == null || this_feature.getLastModified()
                  .compareTo(lasttimemodified) > 0))
            lasttimemodified = this_feature.getLastModified();
    
          final Location this_feature_location = this_feature.getLocation();
    
          if(this_feature_location.getRanges().size() > 1)
          {
            throw new Error("internal error - new location should have "
                + "exactly one range");
          }
    
          final Range new_range = (Range) this_feature_location.getRanges()
              .elementAt(0);
    
          Qualifier id_qualifier = this_feature.getQualifierByName("ID");
          if(id_qualifier != null)
          {
            String id = (String) (id_qualifier.getValues()).elementAt(0);
            id_range_store.put(id, new_range);
            feature_relationship_rank_store.put(id, rank);
          }
    
    tjc's avatar
    tjc committed
          else
            Splash.logger4j.warn("NO ID FOUND FOR FEATURE AT: "+
                         this_feature.getLocation().toString());
          
    
    tjc's avatar
    tjc committed
          if(this_feature_location.isComplement())
            new_range_vector.insertElementAt(new_range, 0);
          else
            new_range_vector.add(new_range);
    
          removeInternal(this_feature);
          qualifier_vector.addAll(this_feature.getQualifiers());
        }
    
    
        final GFFStreamFeature first_old_feature = (GFFStreamFeature)gffFeatures.get(0);
    
    tjc's avatar
    tjc committed
    
        final Location new_location = new Location(new_range_vector,
            first_old_feature.getLocation().isComplement());
    
        qualifier_vector = mergeQualifiers(qualifier_vector, first_old_feature
            .getLocation().isComplement());
    
        final GFFStreamFeature new_feature = new GFFStreamFeature(first_old_feature
            .getKey(), new_location, qualifier_vector);
    
        if(lasttimemodified != null)
          new_feature.setLastModified(lasttimemodified);
    
        
        if(first_old_feature.getChadoGene() != null)
          new_feature.setChadoGene(first_old_feature.getChadoGene());
        
    
    tjc's avatar
    tjc committed
        new_feature.setSegmentRangeStore(id_range_store);
        new_feature
            .setFeature_relationship_rank_store(feature_relationship_rank_store);
    
        new_feature.setGffSource(first_old_feature.getGffSource());
        new_feature.setGffSeqName(first_old_feature.getGffSeqName());
    
    tjc's avatar
    tjc committed
        
    //  set the ID
        String ID;
        try
        {
          ID = new_feature.getSegmentID(new_feature.getLocation().getRanges());
        }
        catch(NullPointerException npe)
        {
          if(new_feature.getQualifierByName("Parent") != null)
            ID = ((String)new_feature.getQualifierByName("Parent").getValues().get(0)) +
                ":"+new_feature.getKey().getKeyString();
          else
            ID = new_feature.getKey().getKeyString();
        }
        final Qualifier id_qualifier = new_feature.getQualifierByName("ID");
        id_qualifier.removeValue((String)(id_qualifier.getValues()).elementAt(0));
    
    tjc's avatar
    tjc committed
        id_qualifier.addValue(ID);
    
        // set visibility
        if(GeneUtils.isHiddenFeature(new_feature.getKey().getKeyString()) ||
           GeneUtils.isObsolete(new_feature))
          new_feature.setVisible(false);
        
    
    tjc's avatar
    tjc committed
        try
        {
          new_feature.setLocation(new_location);
    
          final Qualifier gene_qualifier = new_feature.getQualifierByName("gene");
    
          if(gene_qualifier != null
              && gene_qualifier.getValues().size() > 0
              && ((String) (gene_qualifier.getValues()).elementAt(0))
                  .startsWith("Phat"))
          {
            // special case to handle incorrect output of the Phat gene
            // prediction tool
            new_feature.removeQualifierByName("codon_start");
          }
    
    tjc's avatar
    tjc committed
          {
            final Qualifier old_codon_start_qualifier = first_old_feature
                .getQualifierByName("codon_start");
    
            if(old_codon_start_qualifier != null)
              new_feature.setQualifier(old_codon_start_qualifier);
    
    tjc's avatar
    tjc committed
          forcedAdd(new_feature);
          //gene.addExon(transcript_id, new_feature, true );
          new_set.add(new_feature);
        }
        catch(ReadOnlyException e)
        {
          throw new Error("internal error - unexpected exception: " + e);
        }
        catch(OutOfRangeException e)
        {
          throw new Error("internal error - unexpected exception: " + e);
        }
        catch(EntryInformationException e)
        {
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
      }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      private QualifierVector mergeQualifiers(QualifierVector qualifier_vector,
                                              boolean complement)
    
    tjc's avatar
    tjc committed
      {
        QualifierVector merge_qualifier_vector = new QualifierVector();
    
    tjc's avatar
    tjc committed
        boolean seen = false;
    
    
    tjc's avatar
    tjc committed
        for(int i = 0 ; i < qualifier_vector.size() ; ++i)
        {
          Qualifier qual = (Qualifier)qualifier_vector.elementAt(i);
    
    tjc's avatar
    tjc committed
     
          if(qual.getName().equals("codon_start"))
          {
            if(!complement && !seen)
            {
              merge_qualifier_vector.addElement(qual);
              seen = true;
            }
            else if(complement)
              merge_qualifier_vector.setQualifier(qual);
          }
    
    tjc's avatar
    tjc committed
          else if(qual.getName().equals("Alias"))
    
    tjc's avatar
    tjc committed
          { 
    
    tjc's avatar
    tjc committed
            final Qualifier id_qualifier = 
              merge_qualifier_vector.getQualifierByName("Alias");
    
    tjc's avatar
    tjc committed
    
            if(id_qualifier == null)
              merge_qualifier_vector.addElement(qual);
            else
            {
    
    tjc's avatar
    tjc committed
              String id1 = (String)(id_qualifier.getValues()).elementAt(0);
              String id2 = (String)(qual.getValues()).elementAt(0);
    
    tjc's avatar
    tjc committed
              id_qualifier.removeValue(id1);
              id_qualifier.addValue(id1+","+id2);
            }
          }
    
    tjc's avatar
    tjc committed
          else if(!qual.getName().equals("ID") &&
                  !qual.getName().equals("feature_id"))
            merge_qualifier_vector.setQualifier(qual);
    
    tjc's avatar
    tjc committed
        }
        return merge_qualifier_vector;
      }
    
    tcarver's avatar
    tcarver committed
      
      /**
       * Merge function for GTF features
       * @param original_features
       * @param keyStr
       * @throws ReadOnlyException
       */
      private void mergeGtfFeatures(FeatureVector original_features, String keyStr) throws ReadOnlyException
      {
        Hashtable<String, Vector<GFFStreamFeature>> group = new Hashtable<String, Vector<GFFStreamFeature>>();
        for(int i=0; i<original_features.size(); i++)
        {
          GFFStreamFeature feature = (GFFStreamFeature)original_features.get(i);
          if(!feature.getKey().getKeyString().equals(keyStr))
            continue;
          String transcriptId = 
              ((String) feature.getQualifierByName("transcript_id").getValues().get(0)).replaceAll("'", "");
          if(group.containsKey(transcriptId))
            group.get(transcriptId).add(feature);
          else
          {
            Vector<GFFStreamFeature> this_group = new Vector<GFFStreamFeature>();
            this_group.add(feature);
            group.put(transcriptId, this_group);
          }
        }
        
        Enumeration<String> enumGroup = group.keys();
        while(enumGroup.hasMoreElements())
        {
          String transcriptId = enumGroup.nextElement();
          Vector<GFFStreamFeature> this_group = group.get(transcriptId);
          QualifierVector qualifier_vector = new QualifierVector();
          final RangeVector new_range_vector = new RangeVector();
          
          for(GFFStreamFeature this_feature: this_group)
          {
            removeInternal(this_feature);
            qualifier_vector.addAll(this_feature.getQualifiers());
            
            final Range new_range = (Range) this_feature.getLocation().getRanges().elementAt(0);
            if(this_feature.getLocation().isComplement())
              new_range_vector.insertElementAt(this_feature.getLocation().getTotalRange(), 0);
            else
              new_range_vector.add(new_range);
          }
          final GFFStreamFeature old_feature = (GFFStreamFeature)this_group.get(0);
    
          final Location new_location = new Location(new_range_vector,
              old_feature.getLocation().isComplement());
          
          qualifier_vector = mergeQualifiers(qualifier_vector, new_location.isComplement());
          if(qualifier_vector.getQualifierByName("gene_id") != null)
            qualifier_vector.addQualifierValues(new Qualifier("ID",
                keyStr+":"+qualifier_vector.getQualifierByName("gene_id").getValues().get(0)));
          
          final GFFStreamFeature new_feature = new GFFStreamFeature(old_feature
              .getKey(), new_location, qualifier_vector);
          forcedAdd(new_feature);
        }
      }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
    }