Skip to content
Snippets Groups Projects
GFFStreamFeature.java 36.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* GFFStreamFeature.java
     *
     * created: Tue Sep 14 1999
     *
     * This file is part of Artemis
     *
     * Copyright (C) 1999,2000,2001  Genome Research Limited
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
    
     * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/GFFStreamFeature.java,v 1.64 2008-09-30 13:21:52 tjc Exp $
    
    tjc's avatar
    tjc committed
     */
    
    package uk.ac.sanger.artemis.io;
    
    
    import java.util.Hashtable;
    
    tjc's avatar
    tjc committed
    import java.util.Enumeration;
    
    tjc's avatar
    tjc committed
    import java.util.List;
    
    import java.util.StringTokenizer;
    
    tjc's avatar
    tjc committed
    import java.util.Vector;
    
    tjc's avatar
    tjc committed
    import java.io.IOException;
    import java.io.Writer;
    
    import java.sql.Timestamp;
    import java.text.SimpleDateFormat;
    
    tjc's avatar
    tjc committed
    
    
    import uk.ac.sanger.artemis.Options;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.chado.ClusterLazyQualifierValue;
    import uk.ac.sanger.artemis.components.genebuilder.ortholog.MatchPanel;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.util.LinePushBackReader;
    import uk.ac.sanger.artemis.util.OutOfRangeException;
    import uk.ac.sanger.artemis.util.ReadOnlyException;
    import uk.ac.sanger.artemis.util.StringVector;
    
    
    tjc's avatar
    tjc committed
    
    /**
     *  A StreamFeature that thinks it is a GFF feature.
     *
     *  @author Kim Rutherford
    
     *  @version $Id: GFFStreamFeature.java,v 1.64 2008-09-30 13:21:52 tjc Exp $
    
    tjc's avatar
    tjc committed
     **/
    
    
    public class GFFStreamFeature extends SimpleDocumentFeature
                           implements DocumentFeature, StreamFeature, ComparableFeature 
    {
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      private static org.apache.log4j.Logger logger4j = 
    
    tjc's avatar
    tjc committed
        org.apache.log4j.Logger.getLogger(GFFStreamFeature.class);
      
    
    tjc's avatar
    tjc committed
      /**
       *  This is the line of GFF input that was read to get this
       *  GFFStreamFeature.  A GFFStreamFeature that was created from multiple GFF
       *  lines will have a gff_lines variable that contains multiple line.
       **/
      StringVector gff_lines = null;
    
    
    tjc's avatar
    tjc committed
      /** store for spliced features containing id and range of each segment */
      private Hashtable id_range_store;
    
      
      /** store a record of the new and old uniquenames that have been changed */
      private Hashtable newIdMapToOldId;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      /** store the Timestamp for the feature */
    
      private Timestamp timelastmodified;
      
    
    tjc's avatar
    tjc committed
      private ChadoCanonicalGene chadoGene;
      
    
    tjc's avatar
    tjc committed
      private boolean visible = true;
    
      /** combined feature_relationship.rank store for exons */
    
    tjc's avatar
    tjc committed
      private Hashtable feature_relationship_rank_store;
    
    tjc's avatar
    tjc committed
      
    
      /** first tabbed parameter  */
      private String gffSeqName;
      /** second tabbed parameter */
      private String gffSource;
    
    tjc's avatar
    tjc committed
      /** duplication count */
      private short duplicate = 0;
    
    tjc's avatar
    tjc committed
      private boolean lazyLoaded = false;
      private org.gmod.schema.sequence.Feature chadoLazyFeature;
    
    tjc's avatar
    tjc committed
      /**
       *  Create a new GFFStreamFeature object.  The feature should be added
    
       *  to an Entry (with Entry.add()).
    
    tjc's avatar
    tjc committed
       *  @param key The new feature key
       *  @param location The Location object for the new feature
       *  @param qualifiers The qualifiers for the new feature
       **/
    
      public GFFStreamFeature(final Key key, final Location location,
                              final QualifierVector qualifiers) 
      {
        super(null);
    
    tjc's avatar
    tjc committed
        
    
        try 
        {
          setKey(key);
          setLocation(location);
          setQualifiers(qualifiers);
    
          if(getQualifierByName("score") == null)
            setQualifier(new Qualifier("score", "."));
          
          if(getQualifierByName("gff_source") == null)
            setQualifier(new Qualifier("gff_source", "artemis"));
          
          if(getQualifierByName("gff_seqname") == null)
            setQualifier(new Qualifier("gff_seqname", "."));
    
          if(getQualifierByName("ID") == null)
    
          {
            String idStr = null;
            StringVector v = Options.getOptions().getSystematicQualifierNames();
            for(int i=0; i<v.size(); i++)
            {
              final String sysName = (String)v.get(i);
              if(getQualifierByName(sysName) != null)
              {
                idStr = (String)getQualifierByName(sysName).getValues().get(0);
                break;
              }
            }
            // autogenerate ID
            if(idStr == null)
              idStr = key.getKeyString()+":"+location.toString();
            setQualifier(new Qualifier("ID", idStr));
          }
    
          
        } 
        catch(EntryInformationException e) 
        {
    
    tjc's avatar
    tjc committed
          // this should never happen because the feature will not be in an Entry
    
          throw new Error("internal error - unexpected exception: " + e);
        }
        catch(ReadOnlyException e) 
        {
    
    tjc's avatar
    tjc committed
          // this should never happen because the feature will not be in an Entry
    
          throw new Error("internal error - unexpected exception: " + e);
        } 
        catch(OutOfRangeException e) 
        {
    
    tjc's avatar
    tjc committed
          // this should never happen because the feature will not be in an Entry
    
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
      }
    
    
    tjc's avatar
    tjc committed
      public GFFStreamFeature(final Feature feature)
      {
        this(feature, false);
      }
      
    
    tjc's avatar
    tjc committed
      /**
       *  Create a new GFFStreamFeature with the same key, location and
       *  qualifiers as the given feature.  The feature should be added to an
    
       *  Entry (with Entry.add()).
    
    tjc's avatar
    tjc committed
       *  @param feature The feature to copy.
       **/
    
    tjc's avatar
    tjc committed
      public GFFStreamFeature(final Feature feature, final boolean isDuplicatedInChado) 
    
      {
        this(feature.getKey(), feature.getLocation(), feature.getQualifiers());
    
    tjc's avatar
    tjc committed
        
    
    tjc's avatar
    tjc committed
        if(feature instanceof GFFStreamFeature)
        {
          if(((GFFStreamFeature)feature).id_range_store != null)
            this.id_range_store = 
              (Hashtable)(((GFFStreamFeature)feature).id_range_store).clone();
          
          if(((GFFStreamFeature)feature).feature_relationship_rank_store != null)
            this.feature_relationship_rank_store = 
              (Hashtable)(((GFFStreamFeature)feature).feature_relationship_rank_store).clone();
    
          
          this.setGffSeqName(((GFFStreamFeature)feature).getGffSeqName());
          this.setGffSource(((GFFStreamFeature)feature).getGffSource());
    
    tjc's avatar
    tjc committed
          
          
          if(isDuplicatedInChado)
          {
            try
            {
              final String uniquename;
    
    tjc's avatar
    tjc committed
              final String duplicatePrefix;
              
              if(feature instanceof GFFStreamFeature)
              {
                ((GFFStreamFeature)feature).duplicate++;
                duplicatePrefix = "DUP"+Short.toString(((GFFStreamFeature)feature).duplicate)+"-";
              }
              else
                duplicatePrefix = "DUP";
    
    tjc's avatar
    tjc committed
              if(id_range_store != null)
              {
                final Hashtable new_id_range_store = new Hashtable(id_range_store.size());
                final Enumeration enumIdRangeStore = id_range_store.keys();
                while(enumIdRangeStore.hasMoreElements())
                {
                  final String keyId = (String)enumIdRangeStore.nextElement();
                  final Range range  = (Range)id_range_store.get(keyId);
    
    tjc's avatar
    tjc committed
                  new_id_range_store.put(duplicatePrefix+keyId, range);
    
    tjc's avatar
    tjc committed
                }
                id_range_store.clear();
                this.id_range_store = (Hashtable) new_id_range_store.clone();
                
                uniquename = getSegmentID(getLocation().getRanges());
              }
              else
    
    tjc's avatar
    tjc committed
                uniquename = duplicatePrefix+ (String)getQualifierByName("ID").getValues().get(0);
    
    tjc's avatar
    tjc committed
              setQualifier(new Qualifier("ID", uniquename));
              
              if(getQualifierByName("Parent") != null)
              {
                final String parent =
                  (String) getQualifierByName("Parent").getValues().get(0);
    
    tjc's avatar
    tjc committed
                setQualifier(new Qualifier("Parent", duplicatePrefix+parent));
    
    tjc's avatar
    tjc committed
              }
              
              if(getQualifierByName("Derives_from") != null)
              {
                final String derives_from =
                  (String) getQualifierByName("Derives_from").getValues().get(0);
    
    tjc's avatar
    tjc committed
                setQualifier(new Qualifier("Derives_from", duplicatePrefix+derives_from));
    
    tjc's avatar
    tjc committed
              }
              removeQualifierByName("feature_id");
              removeQualifierByName("timelastmodified");
              removeQualifierByName("feature_relationship_rank");
    
    tjc's avatar
    tjc committed
              //removeQualifierByName(MatchPanel.ORTHOLOG);
              //removeQualifierByName(MatchPanel.PARALOG);
    
    tjc's avatar
    tjc committed
            }
            catch(ReadOnlyException e){}
            catch(EntryInformationException e){}
          }
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Create a new GFFStreamFeature from the given line.  The String should be
       *  in gene finder format.
       **/
    
    tjc's avatar
    tjc committed
      public GFFStreamFeature(final String line)
    
          throws ReadFormatException 
      {
        super(null);
    
    tjc's avatar
    tjc committed
    
    
        final StringVector line_bits = StringVector.getStrings(line, "\t", true);
    
    tjc's avatar
    tjc committed
    
    
        if(line_bits.size() < 8) 
          throw new ReadFormatException("invalid GFF line: 8 fields needed " +
                                        "(got " + line_bits.size () +
                                        " fields) from: " + line);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        final String start_base_string = ((String)line_bits.elementAt(3)).trim();
        final String end_base_string   = ((String)line_bits.elementAt(4)).trim();
    
    tjc's avatar
    tjc committed
    
        final int start_base;
        final int end_base;
    
    
        try 
        {
          start_base = Integer.parseInt(start_base_string);
    
    tjc's avatar
    tjc committed
          end_base   = Integer.parseInt(end_base_string);
    
        } 
        catch(NumberFormatException e)
        {
    
    tjc's avatar
    tjc committed
          throw new ReadFormatException("Could not understand the start or end base " +
                                        "of a GFF feature: " + start_base_string + 
                                        " " + end_base_string);
    
    tjc's avatar
    tjc committed
        }
    
        // start of qualifier parsing and setting
    
    tjc's avatar
    tjc committed
          final boolean complement_flag;
    
    
    tjc's avatar
    tjc committed
          if(((String)line_bits.elementAt(6)).equals("+")) 
    
    tjc's avatar
    tjc committed
            complement_flag = false;
    
    tjc's avatar
    tjc committed
          else if(((String)line_bits.elementAt(6)).equals("-"))
    
    tjc's avatar
    tjc committed
            complement_flag = true;
    
    tjc's avatar
    tjc committed
            // must be unstranded
            complement_flag = false;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            // best we can do
    
    tjc's avatar
    tjc committed
            //final String note_string = "this feature is unstranded";
            //setQualifier(new Qualifier("note", note_string));
    
    tjc's avatar
    tjc committed
          }
    
    
          if(line_bits.size() == 9) 
          {
    
    tjc's avatar
    tjc committed
            final String rest_of_line = (String)line_bits.elementAt(8); 
    
    tjc's avatar
    tjc committed
    
            // parse the rest of the line as ACeDB format attributes
    
            final Hashtable attributes = parseAttributes(rest_of_line);
    
    tjc's avatar
    tjc committed
    //      final String type = (String)line_bits.elementAt(2);
    
    
            for(final java.util.Enumeration attribute_enum = attributes.keys();
                attribute_enum.hasMoreElements();)
    
    tjc's avatar
    tjc committed
            {
              String name = (String)attribute_enum.nextElement();
    
    
              final StringVector values = (StringVector)attributes.get(name);
    
    
    tjc's avatar
    tjc committed
              if(MatchPanel.isClusterTag(name))
              {
                List lazyValues = new Vector();
                for(int i=0; i<values.size(); i++)
                  lazyValues.add(
                      new ClusterLazyQualifierValue( (String)values.get(i), 
                                             this ));
                
                setQualifier(new QualifierLazyLoading(name, lazyValues));
              }
    
    tjc's avatar
    tjc committed
              {
                if(values.size() == 0)
                  setQualifier(new Qualifier(name));
                else
                  setQualifier(new Qualifier(name, values));
              }
    
    tjc's avatar
    tjc committed
            }
          }
    
    
    tjc's avatar
    tjc committed
          /*if( !((String)line_bits.elementAt(0)).equals("null") )
    
          {
            final Qualifier gff_seqname =
              new Qualifier("gff_seqname", decode((String)line_bits.elementAt(0)));
    
    tjc's avatar
    tjc committed
    
    
            setQualifier(gff_seqname);
    
    tjc's avatar
    tjc committed
          }*/
    
          if( !((String)line_bits.elementAt(0)).equals("null") )
            setGffSeqName( decode((String)line_bits.elementAt(0)) );
    
    tjc's avatar
    tjc committed
          final Key key = new Key((String)line_bits.elementAt(2));
    
          setKey(key);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          /*final Qualifier source_qualifier =
    
    tjc's avatar
    tjc committed
            new Qualifier("gff_source", (String)line_bits.elementAt(1));
    
    tjc's avatar
    tjc committed
          setQualifier(source_qualifier);*/
    
          this.setGffSource((String)line_bits.elementAt(1));
          
    
          if( !((String)line_bits.elementAt(5)).equals(".") )
          {
            final Qualifier score_qualifier =
              new Qualifier("score", (String)line_bits.elementAt(5));
            setQualifier(score_qualifier);
          }
          
    
    tjc's avatar
    tjc committed
          String frame = (String)line_bits.elementAt(7);
    
    tjc's avatar
    tjc committed
    
    
          if(frame.equals ("0"))
    
    tjc's avatar
    tjc committed
            frame = "1";
    
    tjc's avatar
    tjc committed
          else if(frame.equals("1"))
            frame = "2";
          else if(frame.equals("2")) 
            frame = "3";
          else
            frame = ".";
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(!frame.equals(".")) 
    
    tjc's avatar
    tjc committed
            final Qualifier codon_start_qualifier =
    
              new Qualifier("codon_start", frame);
    
    tjc's avatar
    tjc committed
    
    
            setQualifier(codon_start_qualifier);
    
    tjc's avatar
    tjc committed
          }
    
    
          if(start_base > end_base) 
            throw new ReadFormatException("start position is greater than end " +
                                          "position: " + start_base + " > " +
    
    tjc's avatar
    tjc committed
                                          end_base+"\n"+line);
    
    tjc's avatar
    tjc committed
    
    
          if(start_base < 0)
            throw new ReadFormatException("start position must be positive: " +
                                          start_base); 
    
    tjc's avatar
    tjc committed
          
    
          final Range location_range = new Range(start_base, end_base);
          final RangeVector location_ranges = new RangeVector(location_range);
          setLocation(new Location(location_ranges, complement_flag));
        }
        catch(ReadOnlyException e) 
        {
          throw new Error("internal error - unexpected exception: " + e);
        } 
        catch(EntryInformationException e) 
        {
          throw new Error("internal error - unexpected exception: " + e);
        } 
        catch(OutOfRangeException e) 
        {
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
    
    
        this.gff_lines = new StringVector(line);
      }
    
    
    tjc's avatar
    tjc committed
      /**
      *
      * Store for spliced regions of segments ID's and ranges.
      *
      */
      public void setSegmentRangeStore(Hashtable id_range_store)
      {
        this.id_range_store = id_range_store;
      }
    
    
    tjc's avatar
    tjc committed
      public Hashtable getSegmentRangeStore()
      {
    
        if(id_range_store == null)
        {
          id_range_store = new Hashtable();
          id_range_store.put((String)this.getQualifierByName("ID").getValues().get(0), 
                             this.getLocation().getTotalRange());
        }
    
    tjc's avatar
    tjc committed
        return id_range_store;
      }
      
    
      public Hashtable getNewIdMapToOldId()
      {
        return newIdMapToOldId;
      }
      
      /**
       * Used when changing spliced feature uniquenames
       * @param newIdMapToOldId
       */
      public void setNewIdMapToOldId(Hashtable newIdMapToOldId)
      {
        this.newIdMapToOldId = newIdMapToOldId;
      }
      
    
    tjc's avatar
    tjc committed
      /**
    
    tjc's avatar
    tjc committed
       * Store for ID's and CHADO feature_relationship.rank
       * @param feature_relationship_rank_store
    
    tjc's avatar
    tjc committed
      public void setFeature_relationship_rank_store(
          Hashtable feature_relationship_rank_store)
      {
        this.feature_relationship_rank_store = feature_relationship_rank_store;
      }
      
      /**
       * Store for ID's and CHADO feature_relationship.rank
       * @return
       */
      public Hashtable getFeature_relationship_rank_store()
      {
        return feature_relationship_rank_store;
      }
    
    tjc's avatar
    tjc committed
      /**
       * Get the chado uniquename 
       * @param r
       * @return
       */
    
    tjc's avatar
    tjc committed
      public String getSegmentID(final Range r)
    
    tjc's avatar
    tjc committed
      {
        if(id_range_store != null)
        {
          Enumeration enum_ranges = id_range_store.keys();
    
    tjc's avatar
    tjc committed
          //Iterator it = id_range_store.values().iterator();
    
    tjc's avatar
    tjc committed
          while(enum_ranges.hasMoreElements())
    
    tjc's avatar
    tjc committed
          //while(it.hasNext())
    
    tjc's avatar
    tjc committed
          {
    
    tjc's avatar
    tjc committed
            String key  = (String)enum_ranges.nextElement();
            Range range = (Range)id_range_store.get(key);
    
    tjc's avatar
    tjc committed
            if(range.getStart() == r.getStart() &&
    
    tjc's avatar
    tjc committed
               range.getEnd()   == r.getEnd())
    
    tjc's avatar
    tjc committed
              return key;
    
    tjc's avatar
    tjc committed
          }
        }
    
    tjc's avatar
    tjc committed
        else if (getQualifierByName("ID") != null)
        {
          return (String)getQualifierByName("ID").getValues().get(0);
        }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        logger4j.warn("RANGE NOT FOUND "+r.toString());
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        return null;
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
      /**
       * Get the feature ID based on the segments chado 
       * uniquename's.
       * @param rv
       * @return
       */
    
    tjc's avatar
    tjc committed
      public String getSegmentID(RangeVector rv)
      {
        String id = "";
        if(id_range_store != null)
        {
          String id_new;
          Range range;
    
    tjc's avatar
    tjc committed
          int index;
    
    tjc's avatar
    tjc committed
          for(int i=0; i<rv.size(); i++)
          {
            range  = (Range)rv.get(i);
            id_new = getSegmentID(range);
    
    tjc's avatar
    tjc committed
            
            String prefix[] = getPrefix(id_new, ':');
            if(prefix[0] != null)
            {
              index = id.indexOf(prefix[0]);
              if(id.equals("") || index < 0)
              {
                if(!id.equals(""))
                  id = id +",";
                id = id+prefix[0] + "{" + prefix[1] + "}";
                continue;
              }
              
              index = id.indexOf('}', index);
              id = id.substring(0,index) + "," + 
                   prefix[1] + id.substring(index);
            }
            else if(id_new != null)
    
    tjc's avatar
    tjc committed
            {
              if(!id.equals(""))
                id = id +",";
              id = id+id_new;
            }
          }
        }
    
    tjc's avatar
    tjc committed
        return id;
      }
      
    
    tjc's avatar
    tjc committed
      /**
       * Get the ID prefix, e.g. for SPAC1556.06.1:exon:2
       * returns SPAC1556.06.1:exon as the prefix and 2 as the
       * index.
       * @param id
       * @return
       */
      public String[] getPrefix(final String id,
                                final char separator)
      {
        String prefix[] = new String[2];
        int index = id.lastIndexOf(separator);
    
        if(index > -1)
        {
          prefix[0] = id.substring(0,index);
          prefix[1] = id.substring(index+1);
        }
        return prefix;
      }
     
      /**
       * Used to automatically generate
       * @param prefix
       * @return
       */
      public int getAutoNumber(final String prefix,
                               final char separator)
      {
        int auto   = 1;
        String val = prefix + separator + auto;
        while(id_range_store.containsKey(val))
        {
          auto++;
          val = prefix + separator + auto;
        }
        return auto;
      }
      
    
      /**
      *
      * For gff-version 3:
      * http://song.sourceforge.net/gff3-jan04.shtml
      *
      * Remove URL escaping rule (e.g. space="%20" or "+")
      *
      */
    
    tjc's avatar
    tjc committed
      public static String decode(String s)
    
        final String map[][] = {
                                 { " ",  "%20" },  // white space
                                 { ",",  "%2C" },  // comma
                                 { ";",  "%3B" },  // semi-colon
                                 { "=",  "%3D" },  // equals
                                 { "\t", "%09" },  // tab
                                 { " ",  "+"   },  // white space
    
    tjc's avatar
    tjc committed
                                 { "+",  "%2B" },
    
                                 { "(",  "%28" },  // left bracket
    
    tjc's avatar
    tjc committed
                                 { ")",  "%29" }   // right bracket
    
    tjc's avatar
    tjc committed
    
    
        int ind;
        String enc;
        String dec;
    
        for(int i=0; i<map.length; i++)
        {
          enc = map[i][1];
          dec = map[i][0];
          while( (ind = s.indexOf(enc)) > -1)
            s = s.substring(0,ind) + dec + s.substring(ind+enc.length());
        }
    
    tjc's avatar
    tjc committed
    
    
      /**
      *
      * For gff-version 3:
      * http://song.sourceforge.net/gff3-jan04.shtml
      *
      * Add URL escaping rule (e.g. space="%20" or "+")
      *
      */
    
    tjc's avatar
    tjc committed
      public static String encode(String s)
    
    tjc's avatar
    tjc committed
      {
    
        final String map[][] = {
    //                           { " ",  "%20" },  // white space
                                 { ",",  "%2C" },  // comma 
                                 { ";",  "%3B" },  // semi-colon
                                 { "=",  "%3D" },  // equals
                                 { "\t", "%09" },  // tab
    
    tjc's avatar
    tjc committed
                                 { "+",  "%2B" },
    
                                 { " ",  "+"   },  // white space
                                 { "(",  "%28" },  // left bracket
    
    tjc's avatar
    tjc committed
                                 { ")",  "%29" },  // right bracket
                                 { "\n", "%5C" }   // new-line 
    
    tjc's avatar
    tjc committed
    
    
        int ind;
        String enc;
        String dec;
    
        for(int i=0; i<map.length; i++)
        {
          enc = map[i][1];
          dec = map[i][0];
          while( (ind = s.indexOf(dec)) > -1 )
            s = s.substring(0,ind) + enc + s.substring(ind+1);
        }
    
    tjc's avatar
    tjc committed
        return s;
      }
    
    
       
      /**
       *  Return the reference of a new copy of this Feature.
       **/
      public Feature copy() 
      {
        final Feature return_value = new GFFStreamFeature(this);
        return return_value;
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Read and return a GFFStreamFeature from a stream.  A feature must be the
       *  next thing in the stream.
       *  @param stream the Feature is read from this stream
       *  @exception IOException thrown if there is a problem reading the Feature -
       *    most likely ReadFormatException.
       *  @exception InvalidRelationException Thrown if this Feature cannot contain
       *    the given Qualifier.
       *  @return null if in_stream is at the end of file when the method is
       *    called
       */
    
      protected static GFFStreamFeature readFromStream(LinePushBackReader stream)
          throws IOException, InvalidRelationException 
      {
        String line = stream.readLine();
        if(line == null) 
    
    tjc's avatar
    tjc committed
          return null;
    
    
        try
        {
          final GFFStreamFeature new_feature = new GFFStreamFeature(line);
    
    tjc's avatar
    tjc committed
          return new_feature;
    
        } 
        catch(ReadFormatException exception) 
        {
    
    tjc's avatar
    tjc committed
          // re-throw the exception with the line number added
    
          final String new_error_string = exception.getMessage();
    
    tjc's avatar
    tjc committed
    
    
          throw new ReadFormatException(new_error_string,
                                        stream.getLineNumber());
    
    tjc's avatar
    tjc committed
        }
      }
    
      /**
       *  Read the details of a feature from an EMBL stream into the current
       *  object.
       *  @param entry_information The EntryInformation object of the Entry that
       *    will contain the Feature.
       *  @param in_stream the Feature is read from this stream
       *  @exception IOException thrown if there is a problem reading the Feature -
       *    most likely ReadFormatException if the stream does not contain GFF
       *    feature.
       **/
    
      public void setFromStream(final EntryInformation entry_information,
                                final LinePushBackReader in_stream)
          throws IOException, InvalidRelationException, ReadOnlyException 
      {
        throw new ReadOnlyException();
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
      protected static Hashtable contig_ranges;
    
    
    tjc's avatar
    tjc committed
      /**
       *  Write this Feature to the given stream.
       *  @param writer The stream to write to.
       *  @exception IOException thrown if there is an io problem while writing
       *    the Feature.
       **/
    
      public void writeToStream(final Writer writer)
          throws IOException 
      {
    
    tjc's avatar
    tjc committed
        final RangeVector ranges = getLocation().getRanges();
        final int ranges_size = ranges.size();
    
    tjc's avatar
    tjc committed
    //  final Hashtable contig_ranges = SimpleDocumentEntry.getContigRanges();
    
    
    tjc's avatar
    tjc committed
        for(int i = 0; i < ranges_size; ++i) 
        {
    
    tjc's avatar
    tjc committed
          Range this_range = (Range)ranges.elementAt(i);
    
          String seqname = getGffSeqName();
          String source  = getGffSource();
    
    tjc's avatar
    tjc committed
          Qualifier score   = getQualifierByName("score");
          Qualifier group   = getQualifierByName("group");
    
          // source becomes a Dbxref in chado
    
    tjc's avatar
    tjc committed
          String source_str = null;
          if(getQualifierByName("Dbxref") != null)
          {
            source_str = getDbxrefGFFSource(getQualifierByName("Dbxref"));
          }
          
          if(seqname == null && ((GFFDocumentEntry)getEntry()).getDocument() != null) 
    
            seqname = ((GFFDocumentEntry)getEntry()).getDocument().getName();
    
    tjc's avatar
    tjc committed
          if(seqname == null)
    
            seqname = "gff_seqname";
    
    tjc's avatar
    tjc committed
          if(source == null) 
    
    tjc's avatar
    tjc committed
          if(score == null) 
    
    tjc's avatar
    tjc committed
            score = new Qualifier("score", ".");
    
    tjc's avatar
    tjc committed
          int start = this_range.getStart();
          int end   = this_range.getEnd();
          if(seqname != null && contig_ranges != null &&
    
             contig_ranges.containsKey(seqname))
    
    tjc's avatar
    tjc committed
          {
    
            Range offset_range = (Range)contig_ranges.get(seqname);
    
    tjc's avatar
    tjc committed
            start = start-offset_range.getStart()+1;
            end   = end-offset_range.getStart()+1;
          }
    
    
    tjc's avatar
    tjc committed
          if(group == null || group.getValues() == null ||
             group.getValues().elementAt(0).equals(""))
          {
            final Qualifier gene = getQualifierByName("gene");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            if(gene == null) 
              group = new Qualifier("group", "");
            else 
              group = gene;
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          String frame = ".";
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          final Qualifier codon_start = getQualifierByName("codon_start");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(codon_start != null && i == 0) 
          {
    
    tjc's avatar
    tjc committed
            frame = (String)(codon_start.getValues()).elementAt(0);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            if(frame.equals ("1")) 
              frame = "0";
            else if(frame.equals("2"))
              frame = "1";
            else if(frame.equals("3"))
              frame = "2";
            else
              frame = ".";
    
    tjc's avatar
    tjc committed
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          final String myId = getSegmentID(this_range);
          final String attribute_string = unParseAttributes(myId);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(source_str == null && source != null)
    
    tjc's avatar
    tjc committed
          String key = getKey().getKeyString();
          if(key.equals("CDS"))
            key = "exon";
    
          writer.write(seqname + "\t" +
    
                       source_str + "\t" +
    
    tjc's avatar
    tjc committed
                       key + "\t" +
    
    tjc's avatar
    tjc committed
                       start + "\t" +
                       end + "\t" +
    
    tjc's avatar
    tjc committed
                       score.getValues() .elementAt(0)+ "\t" +
                       (getLocation().isComplement() ? "-\t" : "+\t") +
                       frame + "\t" +
                       attribute_string + "\n");
        }
    
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
      /**
       * Get the GFF_source value of a Dbxref qualifier.
       * @param qualifier
       * @return  the gff_source value or NULL
       */
    
    tjc's avatar
    tjc committed
      /*
    
      private String getDbxrefGFFSource(final Qualifier qualifier)
      {
        StringVector qualifier_strings =
          StreamQualifier.toStringVector(null, qualifier);
        
        for(int i=0; i<qualifier_strings.size(); i++)
        {
          String qualifier_string = (String)qualifier_strings.elementAt(i);
          
          if(qualifier_string.indexOf("GFF_source:") >-1)
          {
            int index = qualifier_string.indexOf(":")+1;
            int len = qualifier_string.length();
            if(qualifier_string.endsWith("\""))
              len--;
            return qualifier_string.substring(index, len);
          }
        }
        return null;
      }
    
    tjc's avatar
    tjc committed
      */
    
    tjc's avatar
    tjc committed
      /**
       *  Return a String containing the qualifiers of this feature in a form
       *  suitable for using as the last field of a GFF line.  The codon_start
       *  attribute is not included since GFF has a frame field.  gff_seqname,
       *  gff_source and score aren't included since they have corresponding
       *  fields.
       **/
    
    tjc's avatar
    tjc committed
      private String unParseAttributes(final String myId) 
    
      {
        final StringBuffer buffer = new StringBuffer();
        final QualifierVector qualifiers = getQualifiers();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        final String names[] = { "ID", "Name", "Alias", "Parent",
    
    tjc's avatar
    tjc committed
                                 "Derives_from",
    
    tjc's avatar
    tjc committed
                                 "Target", "Gap", "Note", 
                                 "Dbxref", "Ontology_term" };
        int count = 0;
    
    tjc's avatar
    tjc committed
        Qualifier this_qualifier;
    
    tjc's avatar
    tjc committed
        final int names_length = names.length;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        if(myId != null)
        {
          buffer.append("ID=");
          buffer.append(encode(myId));
          count++;
        }
        
        for(int i=1; i<names_length; i++)
    
    tjc's avatar
    tjc committed
          this_qualifier = (Qualifier)qualifiers.getQualifierByName(names[i]);
    
    tjc's avatar
    tjc committed
     
          if(this_qualifier == null)
            continue;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          final String this_qualifier_str = getQualifierString(this_qualifier);
    
    tjc's avatar
    tjc committed
          if(this_qualifier_str == null)
            continue;
    
          if(count != 0)
            buffer.append(";");
          buffer.append(this_qualifier_str);
          count++;
        }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        boolean lname;
    
    tjc's avatar
    tjc committed
        final int qualifiers_size = qualifiers.size();
    
        for(int i = 0; i < qualifiers_size; i++) 
    
    tjc's avatar
    tjc committed
        {
          this_qualifier = (Qualifier)qualifiers.elementAt(i);
    
    
    tjc's avatar
    tjc committed
          lname = false;
    
    tjc's avatar
    tjc committed
          for(int j=0; j<names_length; j++)
    
            if(this_qualifier.getName().equals(names[j]))
    
    tjc's avatar
    tjc committed
              lname = true;
    
          if(lname)
            continue;
    
          
          String this_qualifier_str = getQualifierString(this_qualifier);
          
          if(this_qualifier_str == null)
            continue;
          
    
    tjc's avatar
    tjc committed
          if(count != 0)
    
    tjc's avatar
    tjc committed
            buffer.append(";");
    
    tjc's avatar
    tjc committed
          buffer.append(this_qualifier_str);
        }
    
        return buffer.toString();
      }
    
    
      /**
       * Used to write out the GFF attributes.
       * @param q the qualifier to represent as a <code>String</code>
       * @return  the <code>String</code> representation
       */
    
    tjc's avatar
    tjc committed
      private String getQualifierString(Qualifier q)
      {
        StringBuffer buffer = new StringBuffer();
        final String name = q.getName();
    
        if(name.equals("codon_start") || name.equals("gff_source") ||
           name.equals("gff_seqname") || name.equals("score"))
          return null;
    
    tjc's avatar
    tjc committed
        final StringVector values = q.getValues();
        buffer.append(encode(name));
    
    tjc's avatar
    tjc committed
        if(values != null)
        {
    
          buffer.append('=');
    
    tjc's avatar
    tjc committed
          for(int value_index = 0; value_index < values.size();
              ++value_index)
    
    tjc's avatar
    tjc committed
            final String this_value;
            if(name.equals("class"))
            {
              int index = ((String)values.elementAt(value_index)).indexOf("::");
              if(index > -1)
                this_value = encode(((String)values.elementAt(value_index)).substring(0,index));
              else
                this_value = encode((String)values.elementAt(value_index));
            }
            else
              this_value = encode((String)values.elementAt(value_index));
            
    
            if(value_index>0)
              buffer.append("%2C");
    
    tjc's avatar
    tjc committed
            try
            {
              buffer.append(Integer.valueOf(this_value));
            }
            catch(NumberFormatException _)
    
    tjc's avatar
    tjc committed
              // not an integer
              try
    
    tjc's avatar
    tjc committed
                buffer.append(Double.valueOf(this_value));
              }
              catch (NumberFormatException __)
    
    tjc's avatar
    tjc committed
                // not a double or integer so quote it
    
                buffer.append(this_value);
    
    tjc's avatar
    tjc committed
              }
            }
          }
        }
    
        return buffer.toString();
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Parse the given String as ACeDB format attributes.
       *  Adapted from code by Matthew Pocock for the BioJava project.
    
       *
       *  Modified for gff-version 3.
       *
    
    tjc's avatar
    tjc committed
       *  @return Return a Hashtable.  Each key is an attribute name and each value
       *    of the Hashtable is a StringVector containing the attribute values.
       *    If the attribute has no value then the Hashtable value will be a zero
       *    length vector.
       **/
    
      private Hashtable parseAttributes(final String att_val_list) 
      {
        Hashtable attributes = new Hashtable();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
    //  StringTokenizer tokeniser = new StringTokenizer(att_val_list, ";", false);
    //  while(tokeniser.hasMoreTokens()) 
    //  {
    //    final String this_token = tokeniser.nextToken().trim();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        int ind_start = 0;
        int ind_end;
        while( (ind_end = att_val_list.indexOf(";",ind_start)) > -1 || 
               ind_start < att_val_list.length() )