Skip to content
Snippets Groups Projects
GFFDocumentEntry.java 23.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* GFFDocumentEntry.java
     *
     * created: Tue Sep 14 1999
     *
     * This file is part of Artemis
     *
    
    tjc's avatar
    tjc committed
     * Copyright (C) 1999-2005  Genome Research Limited
    
    tjc's avatar
    tjc committed
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
    
    tjc's avatar
    tjc committed
     * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/GFFDocumentEntry.java,v 1.37 2007-02-09 15:15:20 tjc Exp $
    
    tjc's avatar
    tjc committed
     */
    
    package uk.ac.sanger.artemis.io;
    
    import uk.ac.sanger.artemis.util.*;
    
    
    tjc's avatar
    tjc committed
    import java.io.IOException;
    
    tjc's avatar
    tjc committed
    import java.util.Hashtable;
    import java.util.Enumeration;
    
    tjc's avatar
    tjc committed
    import java.util.Iterator;
    import java.util.List;
    import java.util.Set;
    
    import java.util.Vector;
    
    import java.sql.Timestamp;
    
    tjc's avatar
    tjc committed
    
    /**
     *  A DocumentEntry that can read an GFF entry from a Document.
     *
     *  @author Kim Rutherford
    
    tjc's avatar
    tjc committed
     *  @version $Id: GFFDocumentEntry.java,v 1.37 2007-02-09 15:15:20 tjc Exp $
    
    tjc's avatar
    tjc committed
     **/
    
    public class GFFDocumentEntry extends SimpleDocumentEntry
    
    tjc's avatar
    tjc committed
        implements DocumentEntry 
    {
    
    tjc's avatar
    tjc committed
      private boolean finished_constructor = false;
    
    tjc's avatar
    tjc committed
      
    
    tjc's avatar
    tjc committed
      /**
       *  Create a new GFFDocumentEntry object associated with the given
       *  Document.
       *  @param document This is the file that we will read from.  This is also
       *    used for saving the entry back to the file it came from and to give
       *    the new object a name.
       *  @param listener The object that will listen for ReadEvents.
       *  @exception IOException thrown if there is a problem reading the entry -
       *    most likely ReadFormatException.
       **/
    
    tjc's avatar
    tjc committed
      GFFDocumentEntry(final Document document, final ReadListener listener)
          throws IOException, EntryInformationException 
      {
        super(new GFFEntryInformation(), document, listener);
    
    tjc's avatar
    tjc committed
    
        // join the separate exons into one feature (if appropriate)
    
    tjc's avatar
    tjc committed
        //combineFeatures();
    
    tjc's avatar
    tjc committed
        combineGeneFeatures();
    
    tjc's avatar
    tjc committed
        finished_constructor = true;
      }
    
      /**
       *  Create a new GFFDocumentEntry that will be a copy of the given Entry and
       *  has no Document associated with it.  The new GFFDocumentEntry cannot be
    
    tjc's avatar
    tjc committed
       *  saved to a file with save() unless save(Document) has been called
    
    tjc's avatar
    tjc committed
       *  first.  Some qualifier and location information will be lost.
       *  @param force If true then invalid qualifiers and any features with
       *    invalid keys in the new Entry will be quietly thrown away.  "Invalid"
       *    means that the key/qualifier is not allowed to occur in an Entry of
       *    this type (probably determined by the EntryInformation object of this
       *    Entry).  If false an EntryInformationException will be thrown for
       *    invalid keys or qualifiers.
       **/
    
    tjc's avatar
    tjc committed
      public GFFDocumentEntry(final Entry new_entry, final boolean force)
          throws EntryInformationException 
      {
        super(new GFFEntryInformation(), new_entry, force);
    
    tjc's avatar
    tjc committed
        finished_constructor = true;
      }
    
      /**
       *  Create a new empty GFFDocumentEntry object that has no Document
       *  associated with it.  The new GFFDocumentEntry cannot be saved to a
    
    tjc's avatar
    tjc committed
       *  file with save() unless save(Document) has been called first.  The
       *  save(Document) method will assign a Document.
    
    tjc's avatar
    tjc committed
       **/
    
    tjc's avatar
    tjc committed
      public GFFDocumentEntry(final EntryInformation entry_information) 
      {
        super(new GFFEntryInformation());
    
    tjc's avatar
    tjc committed
        finished_constructor = true;
      }
    
      /**
       *  Returns true if and only if this entry is read only.  For now this
       *  always returns true - GFFDocumentEntry objects can't be changed.
       **/
    
    tjc's avatar
    tjc committed
      public boolean isReadOnly() 
      {
    
    tjc's avatar
    tjc committed
    //  return finished_constructor;
        return false;
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  If the given feature can be added directly to this Entry, then return
       *  it, otherwise create and return a new feature of the appropriate type.
       *  @param copy if true then always new a new copy of the Feature.
       **/
    
    tjc's avatar
    tjc committed
      protected SimpleDocumentFeature makeNativeFeature(final Feature feature,
                                                        final boolean copy) 
      {
        if(!copy && feature instanceof GFFStreamFeature) 
          return (GFFStreamFeature)feature;
        else 
          return new GFFStreamFeature(feature);
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  If the given Sequence can be added directly to this Entry, then return a
       *  copy of it, otherwise create and return a new feature of the appropriate
       *  type for this Entry.
       **/
    
    tjc's avatar
    tjc committed
      protected StreamSequence makeNativeSequence(final Sequence sequence)
      {
        return new FastaStreamSequence(sequence);
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
      private void combineGeneFeatures()
      {
        final FeatureVector original_features = getAllFeatures();
        
        Feature this_feature;
        Hashtable chado_gene = new Hashtable();
        try
        {
          // find the genes
          for(int i = 0 ; i < original_features.size() ; ++i) 
          {
            this_feature = original_features.featureAt(i);
            String key = this_feature.getKey().getKeyString();
            
            if(key.equals("gene"))
            {
              String id = (String)this_feature.getQualifierByName("ID").getValues().get(0);
              ChadoCanonicalGene gene = new ChadoCanonicalGene();
              gene.setGene(this_feature);
              chado_gene.put(id, gene);
              ((GFFStreamFeature)this_feature).setChadoGene(gene);
            }
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          // find the transcripts
    
    tjc's avatar
    tjc committed
          Hashtable transcripts_lookup = new Hashtable();
    
    tjc's avatar
    tjc committed
          for(int i = 0 ; i < original_features.size() ; ++i) 
          {
            this_feature = original_features.featureAt(i);
            
            // transcript 
            Qualifier parent_qualifier = this_feature.getQualifierByName("Parent");
            
            if(parent_qualifier == null)
              continue;
    
            StringVector parents = parent_qualifier.getValues();
            for(int j=0; j<parents.size(); j++)
            {
              String parent = (String)parents.get(j);
              
              if(chado_gene.containsKey(parent))
              {
                // store transcript
                ChadoCanonicalGene gene = (ChadoCanonicalGene)chado_gene.get(parent);
                gene.addTranscript(this_feature);
    
    tjc's avatar
    tjc committed
                
                // store the transcript ID with its ChadoCanonicalGene object
                transcripts_lookup.put((String)this_feature.getQualifierByName("ID").getValues().get(0),
                                       gene);
    
    tjc's avatar
    tjc committed
                continue;
              }
            }
          }
          
    
    tjc's avatar
    tjc committed
          
    
    tjc's avatar
    tjc committed
          // find exons & protein
    
    tjc's avatar
    tjc committed
          String key;
    
    tjc's avatar
    tjc committed
          for(int i = 0 ; i < original_features.size() ; ++i) 
          {
            this_feature = original_features.featureAt(i);
            // exons
    
    tjc's avatar
    tjc committed
            key = this_feature.getKey().getKeyString();
    
    tjc's avatar
    tjc committed
            //if(!key.equals("exon") && !key.equals("polypeptide") &&
            //   !key.endsWith("prime_UTR"))
            //  continue;
    
    tjc's avatar
    tjc committed
            
    
    tjc's avatar
    tjc committed
            Qualifier parent_qualifier  = this_feature.getQualifierByName("Parent");
    
    tjc's avatar
    tjc committed
            Qualifier derives_qualifier = this_feature.getQualifierByName("Derives_from");
            if(parent_qualifier == null && derives_qualifier == null)
    
    tjc's avatar
    tjc committed
              continue;    
              
    
    tjc's avatar
    tjc committed
            Qualifier featureRelationship = 
              this_feature.getQualifierByName("feature_relationship_rank");
    
    tjc's avatar
    tjc committed
            // compare this features parent_id's to transcript id's in the 
            // chado gene hash to decide if it is part of it
            final StringVector parent_id;
            
            if(parent_qualifier != null)
              parent_id = parent_qualifier.getValues();
            else
              parent_id = derives_qualifier.getValues();
            
    
    tjc's avatar
    tjc committed
            for(int j=0; j<parent_id.size(); j++)
            {
              String parent = (String)parent_id.get(j);
    
    tjc's avatar
    tjc committed
             
    
    tjc's avatar
    tjc committed
              if(transcripts_lookup.containsKey(parent))
              {
                ChadoCanonicalGene gene = (ChadoCanonicalGene)transcripts_lookup.get(parent);
    
    tjc's avatar
    tjc committed
                 
    
    tjc's avatar
    tjc committed
                if(parent_qualifier == null)
                  gene.addProtein(parent, this_feature);
    
    tjc's avatar
    tjc committed
                else if(key.equals("three_prime_UTR"))
                  gene.add3PrimeUtr(parent, this_feature);
                else if(key.equals("five_prime_UTR"))
                  gene.add5PrimeUtr(parent, this_feature);
    
    tjc's avatar
    tjc committed
                else if(key.equals("exon") || featureRelationship != null)
                  gene.addSplicedFeatures(parent, this_feature);
    
    tjc's avatar
    tjc committed
                else
                  gene.addOtherFeatures(parent, this_feature);
    
    tjc's avatar
    tjc committed
            Enumeration enum_genes = chado_gene.elements();
            while(enum_genes.hasMoreElements())
            {
              ChadoCanonicalGene gene = (ChadoCanonicalGene)enum_genes.nextElement();
    
    tjc's avatar
    tjc committed
              Feature transcript = (Feature)gene.containsTranscript(parent_id);
    
    tjc's avatar
    tjc committed
              
              if(transcript != null)
              {
                if(parent_qualifier == null)
    
    tjc's avatar
    tjc committed
                  gene.addProtein((String)transcript.getQualifierByName("ID").getValues().get(0), 
                                  this_feature);
    
    tjc's avatar
    tjc committed
                else
    
    tjc's avatar
    tjc committed
                  gene.addExon((String)transcript.getQualifierByName("ID").getValues().get(0),
                               this_feature);
    
    tjc's avatar
    tjc committed
              }
    
    tjc's avatar
    tjc committed
              
    
    tjc's avatar
    tjc committed
            }
    
    tjc's avatar
    tjc committed
            */
    
    tjc's avatar
    tjc committed
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          // now join exons
    
    tjc's avatar
    tjc committed
          //combineFeatures();
    
    tjc's avatar
    tjc committed
          Enumeration enum_genes = chado_gene.elements();
          while(enum_genes.hasMoreElements())
          {
            ChadoCanonicalGene gene = (ChadoCanonicalGene)enum_genes.nextElement();
            combineChadoExons(gene);
          }
    
    tjc's avatar
    tjc committed
        }
        catch(InvalidRelationException e)
        {
          e.printStackTrace();
        }
      }
      
    
    tjc's avatar
    tjc committed
      /**
       *  Join the separate exons into one feature (if appropriate).
       **/
    
    tjc's avatar
    tjc committed
      /*private void combineFeatures()
    
    tjc's avatar
    tjc committed
      {
        final FeatureVector original_features = getAllFeatures();
    
    tjc's avatar
    tjc committed
    
        // the key of these hashes will be the group name and the value is a
        // FeatureVector containing the feature that are in that group
    
    tjc's avatar
    tjc committed
        final Hashtable forward_feature_groups = new Hashtable();
        final Hashtable reverse_feature_groups = new Hashtable();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        Feature this_feature;
        Hashtable this_strand_feature_groups;
    
    tjc's avatar
    tjc committed
        String group_name = null;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        for(int i = 0 ; i < original_features.size() ; ++i) 
        {
    
    tjc's avatar
    tjc committed
          this_feature = original_features.featureAt(i);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(this_feature.getLocation().isComplement()) 
    
    tjc's avatar
    tjc committed
            this_strand_feature_groups = reverse_feature_groups;
    
    tjc's avatar
    tjc committed
          else
    
    tjc's avatar
    tjc committed
            this_strand_feature_groups = forward_feature_groups;
    
    
    tjc's avatar
    tjc committed
          try 
          {
    
    tjc's avatar
    tjc committed
            String key = this_feature.getKey().getKeyString();
    
    tjc's avatar
    tjc committed
            if(key.equals("CDS") || key.equals("polypeptide_domain") || 
               key.equals("polypeptide") || key.equals("exon"))
    
    tjc's avatar
    tjc committed
            {
    
    tjc's avatar
    tjc committed
              if(this_feature.getQualifierByName("Parent") != null)
    
    tjc's avatar
    tjc committed
              {
    
    tjc's avatar
    tjc committed
                StringVector values =
    
    tjc's avatar
    tjc committed
                  this_feature.getQualifierByName("Parent").getValues();
    
    tjc's avatar
    tjc committed
                group_name = (String)values.elementAt(0);
    
                if(this_feature.getQualifierByName("ID") != null &&
                   !key.equals("exon"))
    
    tjc's avatar
    tjc committed
                {
                  values =
                      this_feature.getQualifierByName("ID").getValues();
                  group_name = group_name+values.elementAt(0);
                }
    
    tjc's avatar
    tjc committed
              }
    
    tjc's avatar
    tjc committed
              else
                continue; 
    
    tjc's avatar
    tjc committed
            }
            else
              continue;
    
    tjc's avatar
    tjc committed
    
            final FeatureVector other_features =
              (FeatureVector) this_strand_feature_groups.get(group_name);
    
            if(other_features == null)
    
    tjc's avatar
    tjc committed
            {
    
    tjc's avatar
    tjc committed
              final FeatureVector new_feature_vector = new FeatureVector();
              new_feature_vector.add(this_feature);
              this_strand_feature_groups.put(group_name, new_feature_vector);
            }
    
    tjc's avatar
    tjc committed
            else
    
    tjc's avatar
    tjc committed
              other_features.add(this_feature);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          }
          catch(InvalidRelationException e) 
          {
            throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
          }
    
        }
    
    
    tjc's avatar
    tjc committed
        combineFeaturesFromHash(forward_feature_groups);
        combineFeaturesFromHash(reverse_feature_groups);
    
    tjc's avatar
    tjc committed
      }*/
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      /**
       *  Combine the features (which are exons) and delete the orignals from this
       *  Entry.  The key of this hash will be the group name and the value is a
       *  FeatureVector containing the feature that are in that group.  Groups
       *  that have more than one member will be combined.
       **/
    
    tjc's avatar
    tjc committed
      public void combineChadoExons(ChadoCanonicalGene gene) 
    
    tjc's avatar
    tjc committed
      {
    
    tjc's avatar
    tjc committed
        Vector transcripts = (Vector)gene.getTranscripts();
    
    tjc's avatar
    tjc committed
        gene.correctSpliceSiteAssignments();
    
    tjc's avatar
    tjc committed
        
    
    tjc's avatar
    tjc committed
        for(int i=0; i<transcripts.size(); i++)
    
    tjc's avatar
    tjc committed
        {
    
    tjc's avatar
    tjc committed
          GFFStreamFeature transcript = (GFFStreamFeature)transcripts.get(i);
    
    tjc's avatar
    tjc committed
          String transcript_id = null;
    
    tjc's avatar
    tjc committed
          transcript_id = (String)(transcript.getQualifierByName("ID").getValues().get(0));
          
    
    tjc's avatar
    tjc committed
          Set splicedSiteTypes = gene.getSpliceTypes(transcript_id);
          if(splicedSiteTypes == null)
    
    tjc's avatar
    tjc committed
            continue;
    
    tjc's avatar
    tjc committed
          
    
    tjc's avatar
    tjc committed
          Iterator it = splicedSiteTypes.iterator();
          Vector new_set = new Vector();
          while(it.hasNext())
          {
            String type = (String)it.next();
            List splicedSites = gene.getSpliceSitesOfTranscript(transcript_id, type);
               
            if(splicedSites == null)
              continue;
    
    tjc's avatar
    tjc committed
            mergeFeatures(splicedSites, new_set, 
                          (String)(transcript.getQualifierByName("ID").getValues().get(0)));
          }
    
    tjc's avatar
    tjc committed
          for(int j=0; j<new_set.size(); j++)
    
    tjc's avatar
    tjc committed
          {
    
    tjc's avatar
    tjc committed
            try
    
    tjc's avatar
    tjc committed
            {
    
    tjc's avatar
    tjc committed
              if(j == 0)
                gene.addSplicedFeatures(transcript_id, 
                    (Feature)new_set.get(j), true );
              else
                gene.addSplicedFeatures(transcript_id, 
                    (Feature)new_set.get(j));
            }
            catch(InvalidRelationException e)
    
    tjc's avatar
    tjc committed
            {
    
    tjc's avatar
    tjc committed
              // TODO Auto-generated catch block
              e.printStackTrace();
    
    tjc's avatar
    tjc committed
            }
          }
    
    tjc's avatar
    tjc committed
        // now merge the exons in the ChadoCanonicalGene feature
    
    tjc's avatar
    tjc committed
        /*Enumeration enum_exon_set = new_exon_set.keys();
    
    tjc's avatar
    tjc committed
        int num = 0;
        while(enum_exon_set.hasMoreElements())
        {
          String transcript_id = (String)enum_exon_set.nextElement();
          try
          {
            if(num == 0)
    
    tjc's avatar
    tjc committed
              gene.addSplicedFeatures(transcript_id, 
    
    tjc's avatar
    tjc committed
                           (Feature)new_exon_set.get(transcript_id), true );
    
    tjc's avatar
    tjc committed
            else
    
    tjc's avatar
    tjc committed
              gene.addSplicedFeatures(transcript_id, 
    
    tjc's avatar
    tjc committed
                           (Feature)new_exon_set.get(transcript_id));
    
    tjc's avatar
    tjc committed
            num++;
          }
          catch(InvalidRelationException e)
          {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
    
    tjc's avatar
    tjc committed
        }*/
      }
      
      
      private void mergeFeatures(final List gffFeatures,
                                 final List new_set, 
                                 final String transcript_id)
      {
        Hashtable feature_relationship_rank_store = new Hashtable();
        Hashtable id_range_store = new Hashtable();
        RangeVector new_range_vector = new RangeVector();
        QualifierVector qualifier_vector = new QualifierVector();
        Timestamp lasttimemodified = null;
    
        for(int j = 0; j < gffFeatures.size(); j++)
        {
          final GFFStreamFeature this_feature = (GFFStreamFeature)gffFeatures.get(j);
          
          Integer rank;
          Qualifier rankQualifier = this_feature
              .getQualifierByName("feature_relationship_rank");
          if(rankQualifier == null)
            rank = new Integer(0);
          else
          {
            rank = new Integer((String) (rankQualifier.getValues().get(0)));
            this_feature.getQualifiers().removeQualifierByName("feature_relationship_rank");
          }
          
          // use the most current lastmodified datestamp
          if(this_feature.getLastModified() != null
              && (lasttimemodified == null || this_feature.getLastModified()
                  .compareTo(lasttimemodified) > 0))
            lasttimemodified = this_feature.getLastModified();
    
          final Location this_feature_location = this_feature.getLocation();
    
          if(this_feature_location.getRanges().size() > 1)
          {
            throw new Error("internal error - new location should have "
                + "exactly one range");
          }
    
          final Range new_range = (Range) this_feature_location.getRanges()
              .elementAt(0);
    
          Qualifier id_qualifier = this_feature.getQualifierByName("ID");
          if(id_qualifier != null)
          {
            String id = (String) (id_qualifier.getValues()).elementAt(0);
            id_range_store.put(id, new_range);
            feature_relationship_rank_store.put(id, rank);
          }
    
          if(this_feature_location.isComplement())
            new_range_vector.insertElementAt(new_range, 0);
          else
            new_range_vector.add(new_range);
    
          removeInternal(this_feature);
          qualifier_vector.addAll(this_feature.getQualifiers());
        }
    
        final Feature first_old_feature = (Feature)gffFeatures.get(0);
    
        final Location new_location = new Location(new_range_vector,
            first_old_feature.getLocation().isComplement());
    
        qualifier_vector = mergeQualifiers(qualifier_vector, first_old_feature
            .getLocation().isComplement());
    
        final GFFStreamFeature new_feature = new GFFStreamFeature(first_old_feature
            .getKey(), new_location, qualifier_vector);
    
        if(lasttimemodified != null)
          new_feature.setLastModified(lasttimemodified);
        new_feature.setSegmentRangeStore(id_range_store);
        new_feature
            .setFeature_relationship_rank_store(feature_relationship_rank_store);
    
        // set the ID
        String ID = new_feature.getSegmentID(new_feature.getLocation().getRanges());
    
        Qualifier id_qualifier = new_feature.getQualifierByName("ID");
        id_qualifier.removeValue((String) (id_qualifier.getValues()).elementAt(0));
        id_qualifier.addValue(ID);
    
        try
        {
          new_feature.setLocation(new_location);
    
          final Qualifier gene_qualifier = new_feature.getQualifierByName("gene");
    
          if(gene_qualifier != null
              && gene_qualifier.getValues().size() > 0
              && ((String) (gene_qualifier.getValues()).elementAt(0))
                  .startsWith("Phat"))
          {
            // special case to handle incorrect output of the Phat gene
            // prediction tool
            new_feature.removeQualifierByName("codon_start");
          }
          else
          {
            final Qualifier old_codon_start_qualifier = first_old_feature
                .getQualifierByName("codon_start");
    
            if(old_codon_start_qualifier != null)
              new_feature.setQualifier(old_codon_start_qualifier);
          }
          forcedAdd(new_feature);
          //gene.addExon(transcript_id, new_feature, true );
          new_set.add(new_feature);
    
        }
        catch(ReadOnlyException e)
        {
          throw new Error("internal error - unexpected exception: " + e);
        }
        catch(OutOfRangeException e)
        {
          throw new Error("internal error - unexpected exception: " + e);
        }
        catch(EntryInformationException e)
        {
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
      }
      
    
    tjc's avatar
    tjc committed
      /**
       *  Combine the features (which are exons) and delete the orignals from this
       *  Entry.  The key of this hash will be the group name and the value is a
       *  FeatureVector containing the feature that are in that group.  Groups
       *  that have more than one member will be combined.
       **/
    
    tjc's avatar
    tjc committed
      /*private void combineFeaturesFromHash(final Hashtable feature_groups) 
    
    tjc's avatar
    tjc committed
      {
        final Enumeration enumFeat = feature_groups.keys();
    
    tjc's avatar
    tjc committed
        String name;
        FeatureVector feature_group;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        while(enumFeat.hasMoreElements()) 
        {
    
    tjc's avatar
    tjc committed
          name = (String)enumFeat.nextElement();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          feature_group = (FeatureVector)feature_groups.get(name);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(feature_group.size() > 1) 
          {
    
    tjc's avatar
    tjc committed
            // combine the features (exons) and delete the orignals
    
    
    tjc's avatar
    tjc committed
            final RangeVector new_range_vector = new RangeVector();
    
    tjc's avatar
    tjc committed
            QualifierVector qualifier_vector = new QualifierVector();
            Hashtable id_range_store = new Hashtable();
    
            Timestamp lasttimemodified = null;
            
    
    tjc's avatar
    tjc committed
            for (int i = 0 ; i < feature_group.size() ; ++i) 
            {
    
    tjc's avatar
    tjc committed
              final GFFStreamFeature this_feature =
    
    tjc's avatar
    tjc committed
                (GFFStreamFeature)feature_group.elementAt(i);
    
              lasttimemodified = this_feature.getLastModified();
              
    
    tjc's avatar
    tjc committed
              final Location this_feature_location = this_feature.getLocation();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(this_feature_location.getRanges().size() > 1)
    
    tjc's avatar
    tjc committed
              {
    
    tjc's avatar
    tjc committed
                throw new Error("internal error - new location should have " +
    
    tjc's avatar
    tjc committed
                                 "exactly one range");
              }
    
              final Range new_range =
    
    tjc's avatar
    tjc committed
                (Range)this_feature_location.getRanges().elementAt(0);
    
              Qualifier id_qualifier = this_feature.getQualifierByName("ID");
              if(id_qualifier != null)
              {
    
    tjc's avatar
    tjc committed
                String id = (String)(id_qualifier.getValues()).elementAt(0);
    
    tjc's avatar
    tjc committed
                id_range_store.put(new_range, id);
              }
    
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(this_feature_location.isComplement()) 
                new_range_vector.insertElementAt(new_range, 0);
              else 
                new_range_vector.add(new_range);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              removeInternal(this_feature);
    
    tjc's avatar
    tjc committed
              qualifier_vector.addAll(this_feature.getQualifiers());
    
    tjc's avatar
    tjc committed
            }
    
    
    tjc's avatar
    tjc committed
            final Feature first_old_feature = feature_group.featureAt(0);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            final Location new_location = new Location(new_range_vector,
                        first_old_feature.getLocation().isComplement());
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            qualifier_vector = mergeQualifiers(qualifier_vector,
                                               first_old_feature.getLocation().isComplement());
    
    
    tjc's avatar
    tjc committed
            final GFFStreamFeature new_feature = new GFFStreamFeature(first_old_feature.getKey(),
                                                                 new_location, qualifier_vector);
    
    tjc's avatar
    tjc committed
            
            if(lasttimemodified != null)
              new_feature.setLastModified(lasttimemodified);
    
    tjc's avatar
    tjc committed
            new_feature.setSegmentRangeStore(id_range_store);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            try 
            {
              new_feature.setLocation(new_location);
    
    tjc's avatar
    tjc committed
    
              final Qualifier gene_qualifier =
    
    tjc's avatar
    tjc committed
                new_feature.getQualifierByName("gene");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(gene_qualifier != null &&
                 gene_qualifier.getValues().size() > 0 &&
    
    tjc's avatar
    tjc committed
                 ((String)(gene_qualifier.getValues()).elementAt(0)).startsWith("Phat"))
    
    tjc's avatar
    tjc committed
              {
    
    tjc's avatar
    tjc committed
                // special case to handle incorrect output of the Phat gene
                // prediction tool
    
    tjc's avatar
    tjc committed
                new_feature.removeQualifierByName("codon_start");
              } 
              else
              {
    
    tjc's avatar
    tjc committed
                final Qualifier old_codon_start_qualifier =
    
    tjc's avatar
    tjc committed
                  first_old_feature.getQualifierByName("codon_start");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
                if(old_codon_start_qualifier != null)
                  new_feature.setQualifier(old_codon_start_qualifier);
    
    tjc's avatar
    tjc committed
              }
    
    
    tjc's avatar
    tjc committed
              forcedAdd(new_feature);
            } 
            catch(ReadOnlyException e) 
            {
              throw new Error("internal error - unexpected exception: " + e);
            }
            catch(OutOfRangeException e) 
            {
              throw new Error("internal error - unexpected exception: " + e);
            }
            catch(EntryInformationException e) 
            {
              throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
            }
          }
        }
    
    tjc's avatar
    tjc committed
      }*/
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      private QualifierVector mergeQualifiers(QualifierVector qualifier_vector,
                                              boolean complement)
    
    tjc's avatar
    tjc committed
      {
        QualifierVector merge_qualifier_vector = new QualifierVector();
    
    tjc's avatar
    tjc committed
        boolean seen = false;
    
    
    tjc's avatar
    tjc committed
        for(int i = 0 ; i < qualifier_vector.size() ; ++i)
        {
          Qualifier qual = (Qualifier)qualifier_vector.elementAt(i);
    
    tjc's avatar
    tjc committed
     
          if(qual.getName().equals("codon_start"))
          {
            if(!complement && !seen)
            {
              merge_qualifier_vector.addElement(qual);
              seen = true;
            }
            else if(complement)
              merge_qualifier_vector.setQualifier(qual);
          }
    
    tjc's avatar
    tjc committed
          else if(qual.getName().equals("Alias"))
    
    tjc's avatar
    tjc committed
          { 
    
    tjc's avatar
    tjc committed
            final Qualifier id_qualifier = 
              merge_qualifier_vector.getQualifierByName("Alias");
    
    tjc's avatar
    tjc committed
    
            if(id_qualifier == null)
              merge_qualifier_vector.addElement(qual);
            else
            {
    
    tjc's avatar
    tjc committed
              String id1 = (String)(id_qualifier.getValues()).elementAt(0);
              String id2 = (String)(qual.getValues()).elementAt(0);
    
    tjc's avatar
    tjc committed
              id_qualifier.removeValue(id1);
              id_qualifier.addValue(id1+","+id2);
            }
          }
    
    tjc's avatar
    tjc committed
          else if(!qual.getName().equals("ID") &&
                  !qual.getName().equals("feature_id"))
            merge_qualifier_vector.setQualifier(qual);
    
    tjc's avatar
    tjc committed
        }
        return merge_qualifier_vector;
      }
    
    
    tjc's avatar
    tjc committed
    }