Skip to content
Snippets Groups Projects
GFFDocumentEntry.java 10.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* GFFDocumentEntry.java
     *
     * created: Tue Sep 14 1999
     *
     * This file is part of Artemis
     *
     * Copyright (C) 1999,2000,2001  Genome Research Limited
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
    
    tjc's avatar
    tjc committed
     * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/GFFDocumentEntry.java,v 1.3 2004-11-24 11:55:52 tjc Exp $
    
    tjc's avatar
    tjc committed
     */
    
    package uk.ac.sanger.artemis.io;
    
    import uk.ac.sanger.artemis.util.*;
    
    import java.io.*;
    import java.util.Hashtable;
    import java.util.Enumeration;
    
    /**
     *  A DocumentEntry that can read an GFF entry from a Document.
     *
     *  @author Kim Rutherford
    
    tjc's avatar
    tjc committed
     *  @version $Id: GFFDocumentEntry.java,v 1.3 2004-11-24 11:55:52 tjc Exp $
    
    tjc's avatar
    tjc committed
     **/
    
    public class GFFDocumentEntry extends SimpleDocumentEntry
        implements DocumentEntry {
      /**
       *  Create a new GFFDocumentEntry object associated with the given
       *  Document.
       *  @param document This is the file that we will read from.  This is also
       *    used for saving the entry back to the file it came from and to give
       *    the new object a name.
       *  @param listener The object that will listen for ReadEvents.
       *  @exception IOException thrown if there is a problem reading the entry -
       *    most likely ReadFormatException.
       **/
    
    tjc's avatar
    tjc committed
      GFFDocumentEntry(final Document document, final ReadListener listener)
          throws IOException, EntryInformationException 
      {
        super(new GFFEntryInformation(), document, listener);
    
    tjc's avatar
    tjc committed
    
        // join the separate exons into one feature (if appropriate)
    
    tjc's avatar
    tjc committed
        combineFeatures();
    
    tjc's avatar
    tjc committed
    
        finished_constructor = true;
      }
    
      /**
       *  Create a new GFFDocumentEntry that will be a copy of the given Entry and
       *  has no Document associated with it.  The new GFFDocumentEntry cannot be
    
    tjc's avatar
    tjc committed
       *  saved to a file with save() unless save(Document) has been called
    
    tjc's avatar
    tjc committed
       *  first.  Some qualifier and location information will be lost.
       *  @param force If true then invalid qualifiers and any features with
       *    invalid keys in the new Entry will be quietly thrown away.  "Invalid"
       *    means that the key/qualifier is not allowed to occur in an Entry of
       *    this type (probably determined by the EntryInformation object of this
       *    Entry).  If false an EntryInformationException will be thrown for
       *    invalid keys or qualifiers.
       **/
    
    tjc's avatar
    tjc committed
      public GFFDocumentEntry(final Entry new_entry, final boolean force)
          throws EntryInformationException 
      {
        super(new GFFEntryInformation(), new_entry, force);
    
    tjc's avatar
    tjc committed
    
        finished_constructor = true;
      }
    
      /**
       *  Create a new empty GFFDocumentEntry object that has no Document
       *  associated with it.  The new GFFDocumentEntry cannot be saved to a
    
    tjc's avatar
    tjc committed
       *  file with save() unless save(Document) has been called first.  The
       *  save(Document) method will assign a Document.
    
    tjc's avatar
    tjc committed
       **/
    
    tjc's avatar
    tjc committed
      public GFFDocumentEntry(final EntryInformation entry_information) 
      {
        super(new GFFEntryInformation());
    
    tjc's avatar
    tjc committed
    
        finished_constructor = true;
      }
    
      /**
       *  Returns true if and only if this entry is read only.  For now this
       *  always returns true - GFFDocumentEntry objects can't be changed.
       **/
    
    tjc's avatar
    tjc committed
      public boolean isReadOnly() 
      {
    
    tjc's avatar
    tjc committed
        return finished_constructor;
      }
    
      /**
       *  If the given feature can be added directly to this Entry, then return
       *  it, otherwise create and return a new feature of the appropriate type.
       *  @param copy if true then always new a new copy of the Feature.
       **/
    
    tjc's avatar
    tjc committed
      protected SimpleDocumentFeature makeNativeFeature(final Feature feature,
                                                        final boolean copy) 
      {
        if(!copy && feature instanceof GFFStreamFeature) 
          return (GFFStreamFeature)feature;
        else 
          return new GFFStreamFeature(feature);
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  If the given Sequence can be added directly to this Entry, then return a
       *  copy of it, otherwise create and return a new feature of the appropriate
       *  type for this Entry.
       **/
    
    tjc's avatar
    tjc committed
      protected StreamSequence makeNativeSequence(final Sequence sequence)
      {
        return new FastaStreamSequence(sequence);
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Join the separate exons into one feature (if appropriate).
       **/
    
    tjc's avatar
    tjc committed
      private void combineFeatures()
      {
        final FeatureVector original_features = getAllFeatures();
    
    tjc's avatar
    tjc committed
    
        // the key of these hashes will be the group name and the value is a
        // FeatureVector containing the feature that are in that group
    
    tjc's avatar
    tjc committed
        final Hashtable forward_feature_groups = new Hashtable();
        final Hashtable reverse_feature_groups = new Hashtable();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        Feature this_feature;
        Hashtable this_strand_feature_groups;
        String group_name;
    
    
    tjc's avatar
    tjc committed
        for(int i = 0 ; i < original_features.size() ; ++i) 
        {
    
    tjc's avatar
    tjc committed
          this_feature = original_features.featureAt(i);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(this_feature.getLocation().isComplement()) 
    
    tjc's avatar
    tjc committed
            this_strand_feature_groups = reverse_feature_groups;
    
    tjc's avatar
    tjc committed
          else
    
    tjc's avatar
    tjc committed
            this_strand_feature_groups = forward_feature_groups;
    
    
    tjc's avatar
    tjc committed
          try 
          {
            if(this_feature.getQualifierByName("gene") == null)
            {
              if(this_feature.getQualifierByName("group") == null)
              {
    
    tjc's avatar
    tjc committed
                // no gene names and no groups - give up
                return;
    
    tjc's avatar
    tjc committed
              } 
              else
              {
    
    tjc's avatar
    tjc committed
                final StringVector values =
    
    tjc's avatar
    tjc committed
                  this_feature.getQualifierByName("group").getValues();
                if(values == null) 
                  throw new Error("internal error - " +
    
    tjc's avatar
    tjc committed
                                   "no value for group qualifier");
    
    tjc's avatar
    tjc committed
                else
                  group_name = values.elementAt(0);
    
    tjc's avatar
    tjc committed
              }
    
    tjc's avatar
    tjc committed
            } 
            else
            {
    
    tjc's avatar
    tjc committed
              final StringVector values =
    
    tjc's avatar
    tjc committed
                this_feature.getQualifierByName("gene").getValues();
              if(values == null) 
                throw new Error("internal error - " +
                                "no value for gene qualifier");
              
              group_name = values.elementAt(0);
    
    tjc's avatar
    tjc committed
            }
    
    tjc's avatar
    tjc committed
          }
          catch(InvalidRelationException e) 
          {
            throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
          }
    
          final FeatureVector other_features =
    
    tjc's avatar
    tjc committed
            (FeatureVector) this_strand_feature_groups.get(group_name);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(other_features == null) 
          {
            final FeatureVector new_feature_vector = new FeatureVector();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            new_feature_vector.add(this_feature);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            this_strand_feature_groups.put(group_name, new_feature_vector);
          } 
          else 
            other_features.add(this_feature);
    
    tjc's avatar
    tjc committed
        }
    
    
    tjc's avatar
    tjc committed
        combineFeaturesFromHash(forward_feature_groups);
        combineFeaturesFromHash(reverse_feature_groups);
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Combine the features (which are exons) and delete the orignals from this
       *  Entry.  The key of this hash will be the group name and the value is a
       *  FeatureVector containing the feature that are in that group.  Groups
       *  that have more than one member will be combined.
       **/
    
    tjc's avatar
    tjc committed
      private void combineFeaturesFromHash(final Hashtable feature_groups) 
      {
        final Enumeration enumFeat = feature_groups.keys();
    
    tjc's avatar
    tjc committed
        String name;
        FeatureVector feature_group;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        while(enumFeat.hasMoreElements()) 
        {
    
    tjc's avatar
    tjc committed
          name = (String)enumFeat.nextElement();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          feature_group = (FeatureVector)feature_groups.get(name);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(feature_group.size() > 1) 
          {
    
    tjc's avatar
    tjc committed
            // combine the features (exons) and delete the orignals
    
    
    tjc's avatar
    tjc committed
            final RangeVector new_range_vector = new RangeVector();
    
    tjc's avatar
    tjc committed
    
            // storage for the original GFF lines.  the new feature will have a
            // multi-line gff_line
    
    tjc's avatar
    tjc committed
            StringVector new_gff_lines = new StringVector();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            for (int i = 0 ; i < feature_group.size() ; ++i) 
            {
    
    tjc's avatar
    tjc committed
              final GFFStreamFeature this_feature =
    
    tjc's avatar
    tjc committed
                (GFFStreamFeature)feature_group.elementAt(i);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              final Location this_feature_location = this_feature.getLocation();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(this_feature_location.getRanges().size() > 1)
               {
                throw new Error("internal error - new location should have " +
    
    tjc's avatar
    tjc committed
                                 "exactly one range");
              }
    
              final Range new_range =
    
    tjc's avatar
    tjc committed
                this_feature_location.getRanges().elementAt(0);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(this_feature_location.isComplement()) 
                new_range_vector.insertElementAt(new_range, 0);
              else 
                new_range_vector.add(new_range);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              removeInternal(this_feature);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              new_gff_lines.add(this_feature.gff_lines);
    
    tjc's avatar
    tjc committed
            }
    
    
    tjc's avatar
    tjc committed
            final Feature first_old_feature = feature_group.featureAt(0);
    
    tjc's avatar
    tjc committed
    
            final GFFStreamFeature new_feature =
    
    tjc's avatar
    tjc committed
              new GFFStreamFeature(first_old_feature);
    
    tjc's avatar
    tjc committed
    
            new_feature.gff_lines = new_gff_lines;
    
            final Location new_location =
    
    tjc's avatar
    tjc committed
              new Location(new_range_vector,
                            first_old_feature.getLocation().isComplement());
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            try 
            {
              new_feature.setLocation(new_location);
    
    tjc's avatar
    tjc committed
    
              final Qualifier gene_qualifier =
    
    tjc's avatar
    tjc committed
                new_feature.getQualifierByName("gene");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(gene_qualifier != null &&
                 gene_qualifier.getValues().size() > 0 &&
                 gene_qualifier.getValues().elementAt(0).startsWith("Phat"))
              {
    
    tjc's avatar
    tjc committed
                // special case to handle incorrect output of the Phat gene
                // prediction tool
    
    tjc's avatar
    tjc committed
                new_feature.removeQualifierByName("codon_start");
              } 
              else
              {
    
    tjc's avatar
    tjc committed
                final Qualifier old_codon_start_qualifier =
    
    tjc's avatar
    tjc committed
                  first_old_feature.getQualifierByName("codon_start");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
                if(old_codon_start_qualifier != null)
                  new_feature.setQualifier(old_codon_start_qualifier);
    
    tjc's avatar
    tjc committed
              }
    
    
    tjc's avatar
    tjc committed
              forcedAdd(new_feature);
            } 
            catch(ReadOnlyException e) 
            {
              throw new Error("internal error - unexpected exception: " + e);
            }
            catch(OutOfRangeException e) 
            {
              throw new Error("internal error - unexpected exception: " + e);
            }
            catch(EntryInformationException e) 
            {
              throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
            }
          }
        }
      }
    
      /**
       *
       **/
      private boolean finished_constructor = false;
    }