Skip to content
Snippets Groups Projects
GFFStreamFeature.java 39.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* GFFStreamFeature.java
     *
     * created: Tue Sep 14 1999
     *
     * This file is part of Artemis
     *
     * Copyright (C) 1999,2000,2001  Genome Research Limited
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
    
     * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/GFFStreamFeature.java,v 1.72 2009-08-28 10:33:12 tjc Exp $
    
    tjc's avatar
    tjc committed
     */
    
    package uk.ac.sanger.artemis.io;
    
    import java.util.Hashtable;
    
    import java.util.HashSet;
    
    tjc's avatar
    tjc committed
    import java.util.Enumeration;
    
    tjc's avatar
    tjc committed
    import java.util.List;
    
    import java.util.Set;
    
    import java.util.StringTokenizer;
    
    tjc's avatar
    tjc committed
    import java.util.Vector;
    
    tjc's avatar
    tjc committed
    import java.io.IOException;
    import java.io.Writer;
    
    import java.sql.Timestamp;
    import java.text.SimpleDateFormat;
    
    tjc's avatar
    tjc committed
    
    
    import uk.ac.sanger.artemis.Options;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.chado.ClusterLazyQualifierValue;
    
    import uk.ac.sanger.artemis.components.genebuilder.GeneUtils;
    
    import uk.ac.sanger.artemis.components.genebuilder.ProteinMapPanel;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.components.genebuilder.ortholog.MatchPanel;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.util.LinePushBackReader;
    import uk.ac.sanger.artemis.util.OutOfRangeException;
    import uk.ac.sanger.artemis.util.ReadOnlyException;
    import uk.ac.sanger.artemis.util.StringVector;
    
    import uk.ac.sanger.artemis.io.GFF3Encoder;
    
    tjc's avatar
    tjc committed
    
    /**
    
     * A StreamFeature that thinks it is a GFF feature.
     * 
     * @author Kim Rutherford
    
    tjc's avatar
    tjc committed
     **/
    
    public class GFFStreamFeature extends SimpleDocumentFeature implements
        DocumentFeature, StreamFeature, ComparableFeature {
    
    tjc's avatar
    tjc committed
    
    
      private static org.apache.log4j.Logger    logger4j        = org.apache.log4j.Logger
                                                                    .getLogger(GFFStreamFeature.class);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      /** store for spliced features containing id and range of each segment */
    
      private Hashtable<String, Range>          id_range_store;
    
      /** store a record of the new and old uniquenames that have been changed */
    
      private Hashtable<String, String>         newIdMapToOldId;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      /** store the Timestamp for the feature */
    
      private Timestamp                         timelastmodified;
    
      private ChadoCanonicalGene                chadoGene;
    
      private boolean                           visible         = true;
    
      /** combined feature_relationship.rank store for exons */
    
      private Hashtable<String, Integer>        feature_relationship_rank_store;
    
      /** first tabbed parameter */
      private String                            gffSeqName;
    
      /** second tabbed parameter */
    
      private String                            gffSource;
    
    tjc's avatar
    tjc committed
      /** duplication count */
    
      private short                             duplicate       = 0;
    
    tcarver's avatar
    tcarver committed
      protected static Hashtable<String, Range> contig_ranges;
    
      private boolean                           lazyLoaded      = false;
      private org.gmod.schema.sequence.Feature  chadoLazyFeature;
      private boolean                           readOnlyFeature = false;
    
      private static Set<String>                attrs_to_filter = new HashSet<String>();
    
       * Registers an attribute not to be included in the GFF3 output for
       * GFFStreamFeatures
       * 
       * @param attr
       *          The GFF3 attribute to remove
    
      public static void removeAttribute(String attr) {
    
        attrs_to_filter.add(attr);
      }
    
      /**
    
       * Registers an attribute to be included in the GFF3 output for
       * GFFStreamFeatures
       * 
       * @param attr
       *          The GFF3 attribute to include
    
      public static void includeAttribute(String attr) {
    
        attrs_to_filter.remove(attr);
      }
    
    
    tjc's avatar
    tjc committed
      /**
    
       * Create a new GFFStreamFeature object. The feature should be added to an
       * Entry (with Entry.add()).
       * 
       * @param key
       *          The new feature key
       * @param location
       *          The Location object for the new feature
       * @param qualifiers
       *          The qualifiers for the new feature
    
    tjc's avatar
    tjc committed
       **/
    
      public GFFStreamFeature(final Key key, final Location location,
    
          final QualifierVector qualifiers) {
    
        super(null);
    
        try {
    
          setKey(key);
          setLocation(location);
          setQualifiers(qualifiers);
    
    tcarver's avatar
    tcarver committed
    
    
          if (getQualifierByName("ID") == null) {
    
            String idStr = null;
            StringVector v = Options.getOptions().getSystematicQualifierNames();
    
            for (int i = 0; i < v.size(); i++) {
              final String sysName = (String) v.get(i);
              if (getQualifierByName(sysName) != null) {
                idStr = (String) getQualifierByName(sysName).getValues().get(0);
    
            if (idStr == null)
              idStr = key.getKeyString() + ":" + location.toString();
    
            setQualifier(new Qualifier("ID", idStr));
          }
    
        } catch (EntryInformationException e) {
    
    tjc's avatar
    tjc committed
          // this should never happen because the feature will not be in an Entry
    
          throw new Error("internal error - unexpected exception: " + e);
    
        } catch (ReadOnlyException e) {
    
    tjc's avatar
    tjc committed
          // this should never happen because the feature will not be in an Entry
    
          throw new Error("internal error - unexpected exception: " + e);
    
        } catch (OutOfRangeException e) {
    
    tjc's avatar
    tjc committed
          // this should never happen because the feature will not be in an Entry
    
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
      }
    
    
      public GFFStreamFeature(final Feature feature) {
    
    tjc's avatar
    tjc committed
        this(feature, false);
      }
    
    tjc's avatar
    tjc committed
      /**
    
       * Create a new GFFStreamFeature with the same key, location and qualifiers as
       * the given feature. The feature should be added to an Entry (with
       * Entry.add()).
       * 
       * @param feature
       *          The feature to copy.
    
    tjc's avatar
    tjc committed
       **/
    
      @SuppressWarnings("unchecked")
      public GFFStreamFeature(final Feature feature,
          final boolean isDuplicatedInChado) {
    
        this(feature.getKey(), feature.getLocation(), feature.getQualifiers());
    
        if (feature instanceof GFFStreamFeature) {
          if (((GFFStreamFeature) feature).id_range_store != null)
            this.id_range_store = (Hashtable<String, Range>) (((GFFStreamFeature) feature).id_range_store)
                .clone();
    
          if (((GFFStreamFeature) feature).feature_relationship_rank_store != null)
            this.feature_relationship_rank_store = (Hashtable<String, Integer>) (((GFFStreamFeature) feature).feature_relationship_rank_store)
                .clone();
    
          this.setGffSeqName(((GFFStreamFeature) feature).getGffSeqName());
          this.setGffSource(((GFFStreamFeature) feature).getGffSource());
    
          if (isDuplicatedInChado) {
            try {
    
    tjc's avatar
    tjc committed
              final String uniquename;
    
    tjc's avatar
    tjc committed
              final String duplicatePrefix;
    
              if (feature instanceof GFFStreamFeature) {
                ((GFFStreamFeature) feature).duplicate++;
                duplicatePrefix = "DUP"
                    + Short.toString(((GFFStreamFeature) feature).duplicate) + "-";
              } else
    
    tjc's avatar
    tjc committed
                duplicatePrefix = "DUP";
    
              if (id_range_store != null) {
                final Hashtable<String, Range> new_id_range_store = new Hashtable<String, Range>(
                    id_range_store.size());
    
                final Enumeration<String> enumIdRangeStore = id_range_store.keys();
    
                while (enumIdRangeStore.hasMoreElements()) {
    
                  final String keyId = enumIdRangeStore.nextElement();
    
                  final Range range = id_range_store.get(keyId);
                  new_id_range_store.put(duplicatePrefix + keyId, range);
    
    tjc's avatar
    tjc committed
                }
                id_range_store.clear();
    
                this.id_range_store = (Hashtable<String, Range>) new_id_range_store
                    .clone();
    
                if (getLocation().getRanges().size() > 1)
    
                  uniquename = getSegmentID(getLocation().getRanges());
    
                else {
                  if (((String) getQualifierByName("ID").getValues().get(0))
                      .endsWith("}"))
    
    tcarver's avatar
    tcarver committed
                    uniquename = id_range_store.keys().nextElement();
                  else
    
                    uniquename = duplicatePrefix
                        + (String) getQualifierByName("ID").getValues().get(0);
    
    tcarver's avatar
    tcarver committed
                }
    
              } else
                uniquename = duplicatePrefix
                    + (String) getQualifierByName("ID").getValues().get(0);
    
    tjc's avatar
    tjc committed
              setQualifier(new Qualifier("ID", uniquename));
    
              if (getQualifierByName("Parent") != null) {
                final String parent = (String) getQualifierByName("Parent")
                    .getValues().get(0);
                setQualifier(new Qualifier("Parent", duplicatePrefix + parent));
    
              if (getQualifierByName("Derives_from") != null) {
                final String derives_from = (String) getQualifierByName(
                    "Derives_from").getValues().get(0);
                setQualifier(new Qualifier("Derives_from", duplicatePrefix
                    + derives_from));
    
              // remove qualifiers that don't get transferred to duplicate
    
              final String removeQualifierNames[] = { "feature_id",
                  "timelastmodified", "feature_relationship_rank",
                  ProteinMapPanel.POLYPEPTIDE_DOMAIN, ProteinMapPanel.TMHMM[0],
                  ProteinMapPanel.TMHMM[1], ProteinMapPanel.TMHMM[2],
                  ProteinMapPanel.TMHMM[3], MatchPanel.ORTHOLOG,
                  MatchPanel.ORTHOLOG };
    
              for (int i = 0; i < removeQualifierNames.length; i++)
    
                removeQualifierByName(removeQualifierNames[i]);
    
            } catch (ReadOnlyException e) {
            } catch (EntryInformationException e) {
    
          } else {
            chadoGene = ((GFFStreamFeature) feature).chadoGene;
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
      }
    
      /**
    
       * Create a new GFFStreamFeature from the given line. The String should be in
       * gene finder format.
    
    tjc's avatar
    tjc committed
       **/
    
      public GFFStreamFeature(final String line) throws ReadFormatException {
    
        super(null);
    
    tjc's avatar
    tjc committed
    
    
        final StringVector line_bits = StringVector.getStrings(line, "\t", true);
    
        if (line_bits.size() < 8)
          throw new ReadFormatException("invalid GFF line: 8 fields needed "
              + "(got " + line_bits.size() + " fields) from: " + line);
    
    tjc's avatar
    tjc committed
    
    
    tcarver's avatar
    tcarver committed
        final String start_base_str = line_bits.elementAt(3).trim();
    
        final String end_base_str = line_bits.elementAt(4).trim();
    
    tjc's avatar
    tjc committed
    
        final int start_base;
        final int end_base;
    
        try {
    
    tcarver's avatar
    tcarver committed
          start_base = Integer.parseInt(start_base_str);
    
          end_base = Integer.parseInt(end_base_str);
        } catch (NumberFormatException e) {
          throw new ReadFormatException(
              "Could not understand the start or end base " + "of a GFF feature: "
                  + start_base_str + " " + end_base_str);
    
    tjc's avatar
    tjc committed
        }
    
        // start of qualifier parsing and setting
    
        try {
    
    tjc's avatar
    tjc committed
          final boolean complement_flag;
    
          if (line_bits.elementAt(6).equals("+"))
    
    tjc's avatar
    tjc committed
            complement_flag = false;
    
          else if (line_bits.elementAt(6).equals("-"))
    
    tjc's avatar
    tjc committed
            complement_flag = true;
    
          else {
    
    tjc's avatar
    tjc committed
            // must be unstranded
            complement_flag = false;
    
    tjc's avatar
    tjc committed
          }
    
    
          if (line_bits.size() == 9) {
    
    tcarver's avatar
    tcarver committed
            final String rest_of_line = line_bits.elementAt(8);
    
            final Hashtable<String, StringVector> attributes = parseAttributes(rest_of_line);
    
            for (final Enumeration<String> attribute_enum = attributes.keys(); attribute_enum
                .hasMoreElements();) {
    
              String name = attribute_enum.nextElement();
              final StringVector values = attributes.get(name);
    
              if (MatchPanel.isClusterTag(name)) {
    
                List<ClusterLazyQualifierValue> lazyValues = new Vector<ClusterLazyQualifierValue>();
    
                for (int i = 0; i < values.size(); i++)
                  lazyValues.add(new ClusterLazyQualifierValue((String) values
                      .get(i), name, this));
    
    tjc's avatar
    tjc committed
                setQualifier(new QualifierLazyLoading(name, lazyValues));
    
              } else {
                if (values.size() == 0)
    
    tjc's avatar
    tjc committed
                  setQualifier(new Qualifier(name));
                else
                  setQualifier(new Qualifier(name, values));
              }
    
    tjc's avatar
    tjc committed
            }
          }
    
    
          if (!line_bits.elementAt(0).equals("null"))
            setGffSeqName(GFF3Encoder.decode(line_bits.elementAt(0)));
    
    tcarver's avatar
    tcarver committed
          setKey(new Key(line_bits.elementAt(2)));
          setGffSource(line_bits.elementAt(1));
    
          if (!line_bits.elementAt(5).equals(".")) {
            final Qualifier score_qualifier = new Qualifier("score",
                line_bits.elementAt(5));
    
            setQualifier(score_qualifier);
          }
    
    tcarver's avatar
    tcarver committed
          String frame = line_bits.elementAt(7);
    
    tjc's avatar
    tjc committed
    
    
          if (frame.equals("0"))
    
    tjc's avatar
    tjc committed
            frame = "1";
    
          else if (frame.equals("1"))
    
    tjc's avatar
    tjc committed
            frame = "2";
    
          else if (frame.equals("2"))
    
    tjc's avatar
    tjc committed
            frame = "3";
          else
            frame = ".";
    
    tjc's avatar
    tjc committed
    
    
          if (!frame.equals(".")) {
            final Qualifier codon_start_qualifier = new Qualifier("codon_start",
                frame);
    
    tjc's avatar
    tjc committed
    
    
            setQualifier(codon_start_qualifier);
    
    tjc's avatar
    tjc committed
          }
    
    
          if (start_base > end_base)
            throw new ReadFormatException("start position is greater than end "
                + "position: " + start_base + " > " + end_base + "\n" + line);
    
    tjc's avatar
    tjc committed
    
    
          if (start_base < 0)
            throw new ReadFormatException("start position must be positive: "
                + start_base);
    
          final Range location_range = new Range(start_base, end_base);
          final RangeVector location_ranges = new RangeVector(location_range);
          setLocation(new Location(location_ranges, complement_flag));
    
        } catch (ReadOnlyException e) {
    
          throw new Error("internal error - unexpected exception: " + e);
    
        } catch (EntryInformationException e) {
    
          throw new Error("internal error - unexpected exception: " + e);
    
        } catch (OutOfRangeException e) {
    
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
    
    
        // this.gff_lines = new StringVector(line);
    
    tjc's avatar
    tjc committed
      /**
    
       * 
       * Store for spliced regions of segments ID's and ranges.
       * 
       */
      public void setSegmentRangeStore(Hashtable<String, Range> id_range_store) {
    
    tjc's avatar
    tjc committed
        this.id_range_store = id_range_store;
      }
    
    
      public Hashtable<String, Range> getSegmentRangeStore() {
        if (id_range_store == null) {
    
          id_range_store = new Hashtable<String, Range>();
    
          id_range_store.put((String) this.getQualifierByName("ID").getValues()
              .get(0), this.getLocation().getTotalRange());
    
    tjc's avatar
    tjc committed
        return id_range_store;
      }
    
      public Hashtable<String, String> getNewIdMapToOldId() {
    
      /**
       * Used when changing spliced feature uniquenames
    
      public void setNewIdMapToOldId(Hashtable<String, String> newIdMapToOldId) {
    
        this.newIdMapToOldId = newIdMapToOldId;
      }
    
    tjc's avatar
    tjc committed
      /**
    
    tjc's avatar
    tjc committed
       * Store for ID's and CHADO feature_relationship.rank
    
    tjc's avatar
    tjc committed
       * @param feature_relationship_rank_store
    
    tjc's avatar
    tjc committed
      public void setFeature_relationship_rank_store(
    
          Hashtable<String, Integer> feature_relationship_rank_store) {
    
    tjc's avatar
    tjc committed
        this.feature_relationship_rank_store = feature_relationship_rank_store;
      }
    
    tjc's avatar
    tjc committed
      /**
       * Store for ID's and CHADO feature_relationship.rank
    
    tjc's avatar
    tjc committed
       * @return
       */
    
      public Hashtable<String, Integer> getFeature_relationship_rank_store() {
    
    tjc's avatar
    tjc committed
        return feature_relationship_rank_store;
      }
    
    tjc's avatar
    tjc committed
      /**
    
       * Get the chado uniquename
    
    tjc's avatar
    tjc committed
       * @param r
       * @return
       */
    
      public String getSegmentID(final Range r) {
        if (id_range_store != null) {
    
          int offset = 0;
    
          if (getGffSeqName() != null && contig_ranges != null
              && contig_ranges.containsKey(getGffSeqName())) {
    
            // adjust for coordinates in multi-sequence GFF
            Range offset_range = contig_ranges.get(getGffSeqName());
    
            offset = offset_range.getStart() - 1;
    
          Enumeration<String> enum_ranges = id_range_store.keys();
    
          while (enum_ranges.hasMoreElements()) {
            String key = enum_ranges.nextElement();
    
            Range range = id_range_store.get(key);
    
            if (range.getStart() == r.getStart() - offset
                && range.getEnd() == r.getEnd() - offset)
    
    tjc's avatar
    tjc committed
              return key;
    
    tjc's avatar
    tjc committed
          }
    
        } else if (getQualifierByName("ID") != null) {
          return (String) getQualifierByName("ID").getValues().get(0);
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
    
    
        logger4j.warn("RANGE NOT FOUND " + r.toString());
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        return null;
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
      /**
    
       * Get the feature ID based on the segments chado uniquename's.
       * 
    
    tjc's avatar
    tjc committed
       * @param rv
       * @return
       */
    
      public String getSegmentID(final RangeVector rv) {
    
    tjc's avatar
    tjc committed
        String id = "";
    
        if (id_range_store != null) {
    
    tjc's avatar
    tjc committed
          String id_new;
          Range range;
    
    tjc's avatar
    tjc committed
          int index;
    
          for (int i = 0; i < rv.size(); i++) {
            range = (Range) rv.get(i);
    
    tjc's avatar
    tjc committed
            id_new = getSegmentID(range);
    
    tjc's avatar
    tjc committed
            String prefix[] = getPrefix(id_new, ':');
    
            if (prefix[0] != null) {
    
    tjc's avatar
    tjc committed
              index = id.indexOf(prefix[0]);
    
              if (id.equals("") || index < 0) {
                if (!id.equals(""))
                  id = id + ",";
                id = id + prefix[0] + "{" + prefix[1] + "}";
    
    tjc's avatar
    tjc committed
                continue;
              }
    
    tjc's avatar
    tjc committed
              index = id.indexOf('}', index);
    
              id = id.substring(0, index) + "," + prefix[1] + id.substring(index);
            } else if (id_new != null) {
              if (!id.equals(""))
                id = id + ",";
              id = id + id_new;
    
    tjc's avatar
    tjc committed
        return id;
      }
    
    tjc's avatar
    tjc committed
      /**
    
       * Get the ID prefix, e.g. for SPAC1556.06.1:exon:2 returns SPAC1556.06.1:exon
       * as the prefix and 2 as the index.
       * 
    
    tjc's avatar
    tjc committed
       * @param id
       * @return
       */
    
      public String[] getPrefix(final String id, final char separator) {
    
    tjc's avatar
    tjc committed
        String prefix[] = new String[2];
        int index = id.lastIndexOf(separator);
    
    
        if (index > -1) {
          prefix[0] = id.substring(0, index);
          prefix[1] = id.substring(index + 1);
    
    tjc's avatar
    tjc committed
        }
        return prefix;
      }
    
    tjc's avatar
    tjc committed
      /**
       * Used to automatically generate
    
    tjc's avatar
    tjc committed
       * @param prefix
       * @return
       */
    
      public int getAutoNumber(final String prefix, final char separator) {
        int auto = 1;
    
    tjc's avatar
    tjc committed
        String val = prefix + separator + auto;
    
        while (id_range_store.containsKey(val)) {
    
    tjc's avatar
    tjc committed
          auto++;
          val = prefix + separator + auto;
        }
        return auto;
      }
    
       * Return the reference of a new copy of this Feature.
    
      public Feature copy() {
    
        final Feature return_value = new GFFStreamFeature(this);
        return return_value;
    
    tjc's avatar
    tjc committed
      }
    
      /**
    
       * Read and return a GFFStreamFeature from a stream. A feature must be the
       * next thing in the stream.
       * 
       * @param stream
       *          the Feature is read from this stream
       * @exception IOException
       *              thrown if there is a problem reading the Feature - most likely
       *              ReadFormatException.
       * @exception InvalidRelationException
       *              Thrown if this Feature cannot contain the given Qualifier.
       * @return null if in_stream is at the end of file when the method is called
    
    tjc's avatar
    tjc committed
       */
    
      protected static GFFStreamFeature readFromStream(LinePushBackReader stream)
    
          throws IOException, InvalidRelationException {
    
    tcarver's avatar
    tcarver committed
        final String line = stream.readLine();
    
        if (line == null)
    
    tjc's avatar
    tjc committed
          return null;
    
    
        try {
    
    tcarver's avatar
    tcarver committed
          return new GFFStreamFeature(line);
    
        } catch (ReadFormatException exception) {
    
    tjc's avatar
    tjc committed
          // re-throw the exception with the line number added
    
          final String new_error_string = exception.getMessage();
    
    tjc's avatar
    tjc committed
    
    
          throw new ReadFormatException(new_error_string, stream.getLineNumber());
    
    tjc's avatar
    tjc committed
        }
      }
    
      /**
    
       * Read the details of a feature from an EMBL stream into the current object.
       * 
       * @param entry_information
       *          The EntryInformation object of the Entry that will contain the
       *          Feature.
       * @param in_stream
       *          the Feature is read from this stream
       * @exception IOException
       *              thrown if there is a problem reading the Feature - most likely
       *              ReadFormatException if the stream does not contain GFF
       *              feature.
    
    tjc's avatar
    tjc committed
       **/
    
      public void setFromStream(final EntryInformation entry_information,
    
          final LinePushBackReader in_stream) throws IOException,
          InvalidRelationException, ReadOnlyException {
    
        throw new ReadOnlyException();
    
    tjc's avatar
    tjc committed
      }
    
      /**
    
       * Write this Feature to the given stream.
       * 
       * @param writer
       *          The stream to write to.
       * @exception IOException
       *              thrown if there is an io problem while writing the Feature.
    
    tjc's avatar
    tjc committed
       **/
    
      public void writeToStream(final Writer writer) throws IOException {
    
    tjc's avatar
    tjc committed
        final RangeVector ranges = getLocation().getRanges();
        final int ranges_size = ranges.size();
    
    tjc's avatar
    tjc committed
    
    
        // final Hashtable contig_ranges = SimpleDocumentEntry.getContigRanges();
        for (int i = 0; i < ranges_size; ++i) {
          Range this_range = (Range) ranges.elementAt(i);
    
          String seqname = getGffSeqName();
    
          String source = getGffSource();
          Qualifier score = getQualifierByName("score");
          Qualifier group = getQualifierByName("group");
    
          // source becomes a Dbxref in chado
    
    tjc's avatar
    tjc committed
          String source_str = null;
    
          if (getQualifierByName("Dbxref") != null) {
    
    tjc's avatar
    tjc committed
            source_str = getDbxrefGFFSource(getQualifierByName("Dbxref"));
          }
    
    tcarver's avatar
    tcarver committed
          int start = this_range.getStart();
    
          int end = this_range.getEnd();
    
          if (seqname == null
              && ((GFFDocumentEntry) getEntry()).getDocument() != null)
            seqname = ((GFFDocumentEntry) getEntry()).getDocument().getName();
          if (seqname == null)
    
    tcarver's avatar
    tcarver committed
            seqname = deriveSeqName(start);
    
          if (source == null)
    
          if (score == null)
    
    tjc's avatar
    tjc committed
            score = new Qualifier("score", ".");
    
          if (seqname != null && contig_ranges != null
              && contig_ranges.containsKey(seqname)) {
    
    tcarver's avatar
    tcarver committed
            Range offset_range = contig_ranges.get(seqname);
    
            start = start - offset_range.getStart() + 1;
            end = end - offset_range.getStart() + 1;
    
    tjc's avatar
    tjc committed
          }
    
    
          if (group == null || group.getValues() == null
              || group.getValues().elementAt(0).equals("")) {
    
    tjc's avatar
    tjc committed
            final Qualifier gene = getQualifierByName("gene");
    
    tjc's avatar
    tjc committed
    
    
            if (gene == null)
    
    tjc's avatar
    tjc committed
              group = new Qualifier("group", "");
    
    tjc's avatar
    tjc committed
              group = gene;
          }
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          final Qualifier codon_start = getQualifierByName("codon_start");
    
    tjc's avatar
    tjc committed
    
    
          if (codon_start != null) {
            frame = (String) (codon_start.getValues()).elementAt(0);
    
    tjc's avatar
    tjc committed
    
    
            if (frame.equals("1"))
    
    tjc's avatar
    tjc committed
              frame = "0";
    
            else if (frame.equals("2"))
    
    tjc's avatar
    tjc committed
              frame = "1";
    
            else if (frame.equals("3"))
    
    tjc's avatar
    tjc committed
              frame = "2";
            else
              frame = ".";
    
    tjc's avatar
    tjc committed
          }
    
          // phase is REQUIRED for all CDS features
    
          if (getKey().equals("CDS") && frame.equals("."))
    
            frame = "0";
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          final String myId = getSegmentID(this_range);
    
          String attribute_string = unParseAttributes(myId);
    
    tjc's avatar
    tjc committed
    
    
          if (source_str == null && source != null)
            source_str = source;
    
    tcarver's avatar
    tcarver committed
          final String translation = getTranslation();
    
          if (translation != null)
    
            attribute_string = attribute_string + ";" + translation;
    
          writer.write(seqname + "\t" + source_str + "\t" + getKey().getKeyString()
              + "\t" + start + "\t" + end + "\t" + score.getValues().elementAt(0)
              + "\t" + (getLocation().isComplement() ? "-\t" : "+\t") + frame
              + "\t" + attribute_string + "\n");
    
    tjc's avatar
    tjc committed
        }
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
      /**
    
       * If the seqname is not set for this feature try to derive the
       * contig/chromosome it is located on
       * 
    
    tcarver's avatar
    tcarver committed
       * @param start
       * @return
    
    tjc's avatar
    tjc committed
       */
    
      private String deriveSeqName(int start) {
    
    tcarver's avatar
    tcarver committed
        String seqname = null;
    
        if (contig_ranges != null) {
    
    tcarver's avatar
    tcarver committed
          final Enumeration<String> contigEnum = contig_ranges.keys();
    
          while (contigEnum.hasMoreElements()) {
    
    tcarver's avatar
    tcarver committed
            final String key = contigEnum.nextElement();
            final Range r = contig_ranges.get(key);
    
            if (r.getStart() > start)
    
    tcarver's avatar
    tcarver committed
              continue;
    
            if (r.getEnd() > start)
    
    tcarver's avatar
    tcarver committed
              return key;
    
        } else {
          try {
            seqname = ((GFFStreamFeature) (getEntry().getAllFeatures().elementAt(0)))
                .getGffSeqName();
          } catch (Exception e) {
    
        if (seqname == null)
    
    tcarver's avatar
    tcarver committed
          seqname = "gff_seqname";
        return seqname;
    
    tjc's avatar
    tjc committed
      /**
    
       * Return a String containing the qualifiers of this feature in a form
       * suitable for using as the last field of a GFF line. The codon_start
       * attribute is not included since GFF has a frame field. gff_seqname,
       * gff_source and score aren't included since they have corresponding fields.
    
    tjc's avatar
    tjc committed
       **/
    
      private String unParseAttributes(final String myId) {
        //final StringBuffer buffer = new StringBuffer();
    
        final QualifierVector qualifiers = getQualifiers();
    
        GFF3AttributeBuilder abuf = new GFF3AttributeBuilder();
        prepareProcessors(abuf);
        
        for (String attr : attrs_to_filter) {
          abuf.ignore(attr);
        }
    
    tjc's avatar
    tjc committed
    
    
        final String names[] = { "ID", "Name", "Alias", "Parent", "Derives_from",
            "Target", "Gap", "Note", "Dbxref", "Ontology_term", "Start_range",
            "End_range", "Is_circular" };
    
    tjc's avatar
    tjc committed
        final int names_length = names.length;
    
    tjc's avatar
    tjc committed
    
    
        // add ID attribute
        if (myId != null) {
         abuf.add("ID", myId);
    
    tjc's avatar
    tjc committed
        }
    
        
        // build reserved attributes
        for (int i = 1; i < names_length; i++) {
    
    tcarver's avatar
    tcarver committed
          Qualifier this_qualifier = qualifiers.getQualifierByName(names[i]);
    
          
          if (this_qualifier == null)
    
    tjc's avatar
    tjc committed
            continue;
    
    
          abuf.add(this_qualifier.getName(), this_qualifier.getValues());
    
    tjc's avatar
    tjc committed
        }
    
        // build remaining attributes
    
    tjc's avatar
    tjc committed
        boolean lname;
    
        for (Qualifier this_qualifier : qualifiers) {
    
    tjc's avatar
    tjc committed
          lname = false;
    
          // skip reserved names
          for (int j = 0; j < names_length; j++)
            if (this_qualifier.getName().equals(names[j]))
              lname = true;
          if (lname)
    
          // skip internal qualifiers
          if ((this_qualifier.getName().equals("private") && System
              .getProperty("noprivate") != null)
              || (this_qualifier.getName().equals("history") && System
                  .getProperty("nohistory") != null))
    
            continue;
    
          abuf.add(this_qualifier.getName(), this_qualifier.getValues());
    
    tjc's avatar
    tjc committed
        }
    
    
        return abuf.toString();
    
    tjc's avatar
    tjc committed
      }
    
      void prepareProcessors(GFF3AttributeBuilder abuf) {
        GFF3AttributeAggregator productProc = new GFF3AttributeAggregator() {
          @Override
          public String process(StringVector values) {
            StringBuilder buffer = new StringBuilder();
            if (values != null && values.size() > 0) {
              for (int value_index = 0; value_index < values.size(); ++value_index) {
                final String this_value;
                int index = values.elementAt(value_index).indexOf("term=");
                // strip off the 'term=' etc
                if (index > -1)
                  this_value = GFF3Encoder.encode(values.elementAt(value_index)
                      .substring(index + 5,
                          values.elementAt(value_index).length() - 1));
                else
                  this_value = GFF3Encoder.encode(values.elementAt(value_index));
                if (value_index > 0 && value_index < (values.size())) {
                  buffer.append(",");
                }
                buffer.append(this_value);
              }
            }
            return buffer.toString();
          }
        };
    
        GFF3AttributeAggregator ecProc = new GFF3AttributeAggregator() {
          @Override
          public String process(StringVector values) {
            StringBuilder buffer = new StringBuilder();
            if (values != null && values.size() > 0) {
              for (int value_index = 0; value_index < values.size(); ++value_index) {
                final String this_value = "EC:"
                    + GFF3Encoder.encode(values.elementAt(value_index));
                if (value_index > 0 && value_index < (values.size())) {
                  buffer.append(",");
                }
                buffer.append(this_value);
              }
            }
            return buffer.toString();
          }
        };
    
        GFF3AttributeAggregator psysIDProc = new GFF3AttributeAggregator() {
          @Override
          public String process(StringVector values) {
            StringBuilder buffer = new StringBuilder();
            if (values != null && values.size() > 0) {
              for (int value_index = 0; value_index < values.size(); ++value_index) {
                final String this_value;
                int index = values.elementAt(value_index).indexOf(";current=");
                if (index > -1)
                  this_value = GFF3Encoder.encode(values.elementAt(value_index)
                      .substring(0, index - 1));
                else
                  this_value = GFF3Encoder.encode(values.elementAt(value_index));
                if (value_index > 0 && value_index < (values.size())) {
                  buffer.append(",");
                }
                buffer.append(this_value);
              }
            }
            return buffer.toString();
          }
        };
    
        GFF3AttributeAggregator classProc = new GFF3AttributeAggregator() {
          @Override
          public String process(StringVector values) {
            StringBuilder buffer = new StringBuilder();
            if (values != null && values.size() > 0) {
              for (int value_index = 0; value_index < values.size(); ++value_index) {
                final String this_value;
                int index = values.elementAt(value_index).indexOf("::");
                if (index > -1)
                  this_value = GFF3Encoder.encode(values.elementAt(value_index)
                      .substring(0, index));
                else
                  this_value = GFF3Encoder.encode(values.elementAt(value_index));
                if (value_index > 0 && value_index < (values.size())) {
                  buffer.append(",");
                }
                buffer.append(this_value);
              }
            }
            return buffer.toString();
          }
        };
    
        GFF3AttributeAggregator startEndRangeProc = new GFF3AttributeAggregator() {
          @Override
          public String process(StringVector values) {
            StringBuilder buffer = new StringBuilder();
             if (values != null && values.size() > 0) {
              for (int value_index = 0; value_index < values.size(); ++value_index) {
                if (value_index > 0 && value_index < (values.size())) {
                  buffer.append(",");
                }
                buffer.append(values.elementAt(value_index));
              }
            }
            return buffer.toString();
          }
        };
    
        GFF3AttributeAggregator goProc = new GFF3AttributeAggregator() {
          @Override
          public String process(StringVector values) {
            StringBuilder buffer = new StringBuilder();
            if (values != null && values.size() > 0) {
              for (int value_index = 0; value_index < values.size(); ++value_index) {
                int goindex = values.elementAt(value_index).indexOf("GOid=");
                int termindex = values.elementAt(value_index).indexOf(";term=");
                if (goindex > -1 && termindex > -1) {
                  buffer.append(GFF3Encoder.encode(values.elementAt(value_index)
                      .substring(goindex + 5, termindex)));
                  if (value_index < (values.size()) - 1)
                    buffer.append(",");
                }
              }
            }
            return buffer.toString();
          }
        };
    
        // map GO -> full_GO
        abuf.setMapping("GO", "full_GO");
        abuf.setGlue("full_GO", ",");
    
        // merge curation and comment
        abuf.setMapping("curation", "comment");
        
        // also put GOs in Ontology_term
        abuf.setClone("full_GO", "Ontology_term");
        abuf.setAggregator("Ontology_term", goProc);
        abuf.setGlue("Ontology_term", ",");
    
        // class
        abuf.setAggregator("class", classProc);
    
        // EC numbers go into Dbxref
        abuf.setMapping("EC_number", "Dbxref");
    
        abuf.setAggregator("EC_number", ecProc);
    
        abuf.setGlue("Dbxref", ",");
    
        // start/end ranges
        abuf.setAggregator("Start_range", startEndRangeProc);
        abuf.setAggregator("End_range", startEndRangeProc);
    
        // previous_systematic_id
        abuf.setAggregator("previous_systematic_id", psysIDProc);
        
        // product
        abuf.setAggregator("product", productProc);
      }
    
      /**
       * Get the translation qualifier string for polypeptide features.
    
      private String getTranslation() {
        if (!getKey().getKeyString().equals("polypeptide"))
          return null;
        if (chadoGene != null) {
          if (getUserData() == null)
    
            new uk.ac.sanger.artemis.Feature(this);
    
          // the above line constructs the appropriate userData within this current
          // GFFStreamFeature object,
    
          // which is required by the following GeneUtils.deriveResidues()
    
          String residues = GeneUtils.deriveResidues(this);
          if (residues != null)
    
            return "translation=" + residues;
    
       * Parse the given String as ACeDB format attributes. Adapted from code by
       * Matthew Pocock for the BioJava project.
       * 
       * Modified for gff-version 3.
       * 
       * @return Return a Hashtable. Each key is an attribute name and each value of
       *         the Hashtable is a StringVector containing the attribute values. If
       *         the attribute has no value then the Hashtable value will be a zero
       *         length vector.
    
    tjc's avatar
    tjc committed
       **/
    
      private Hashtable<String, StringVector> parseAttributes(
          final String att_val_list) {
    
    tcarver's avatar
    tcarver committed
        final Hashtable<String, StringVector> attr = new Hashtable<String, StringVector>();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        int ind_start = 0;
        int ind_end;
    
        while ((ind_end = att_val_list.indexOf(";", ind_start)) > -1
            || ind_start < att_val_list.length()) {
          if (ind_end < 0)
    
    tjc's avatar
    tjc committed
            ind_end = att_val_list.length();
    
    
          final String this_token = GFF3Encoder.decode(att_val_list.substring(ind_start,
              ind_end).trim());
          ind_start = ind_end + 1;
    
    tjc's avatar
    tjc committed
    
    
          int index_of_first_space = this_token.indexOf(" ");
    
    tcarver's avatar
    tcarver committed
          final String att_name;
    
          StringVector att_values = new StringVector();
    
          if (this_token.indexOf("=") > -1
              && (this_token.indexOf("=") < index_of_first_space || index_of_first_space == -1)) {
    
            index_of_first_space = this_token.indexOf("=");
            att_name = this_token.substring(0, index_of_first_space);
    
            att_values.add(this_token.substring(index_of_first_space + 1).trim());
          } else if (index_of_first_space == -1)
    
    tjc's avatar
    tjc committed
            att_name = this_token;
    
          else {
    
            att_name = this_token.substring(0, index_of_first_space);
    
    tjc's avatar
    tjc committed
    
    
            String rest_of_token = this_token.substring(index_of_first_space + 1)
                .trim();
    
    tjc's avatar
    tjc committed
    
    
            while (rest_of_token.length() > 0) {
              if (rest_of_token.startsWith("\"")) {
    
    tjc's avatar
    tjc committed
                int quote_index = 0;
    
                do {
    
    tjc's avatar
    tjc committed
                  quote_index++;
    
                  quote_index = rest_of_token.indexOf("\"", quote_index);