Skip to content
Snippets Groups Projects
SimpleComparisonData.java 10.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* SimpleComparisonData.java
     *
     * created: Wed May 17 2000
     *
     * This file is part of Artemis
     *
     * Copyright (C) 2000  Genome Research Limited
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
    
     * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/SimpleComparisonData.java,v 1.3 2005-11-17 16:50:50 tjc Exp $
    
    tjc's avatar
    tjc committed
     */
    
    package uk.ac.sanger.artemis;
    
    import uk.ac.sanger.artemis.sequence.*;
    
    import uk.ac.sanger.artemis.io.Range;
    import uk.ac.sanger.artemis.util.OutOfRangeException;
    import uk.ac.sanger.artemis.util.LinePushBackReader;
    
    import java.io.*;
    import java.util.StringTokenizer;
    import java.util.Vector;
    import java.util.Hashtable;
    
    /**
     *  This class contains methods that are common to all ComparisonData
     *  objects.  In particular it has methods for managing AlignMatch objects.
     *
     *  @author Kim Rutherford <kmr@sanger.ac.uk>
    
     *  @version $Id: SimpleComparisonData.java,v 1.3 2005-11-17 16:50:50 tjc Exp $
    
    tjc's avatar
    tjc committed
     **/
    
    
    tjc's avatar
    tjc committed
    abstract class SimpleComparisonData implements ComparisonData 
    {
      /** array of matches created by the constructor */
      private AlignMatch [] matches;
    
      /** array is used as a buffer */
      private AlignMatch [] match_buffer;
    
      /** Set by the constructor and returned by getMaximumScore() */
      private int max_score = -1;
    
      /** Set by the constructor and returned by getMinimumScore() */
      private int min_score = 999999999;
    
      /** Set by setMatches() to be the highest base we see in the subject */
      private int subject_sequence_max_base = -1;
    
      /** Set by setMatches() to be the highest base we see in the query   */
      private int query_sequence_max_base = -1;
    
    
    tjc's avatar
    tjc committed
      /**
       *  Create a new SimpleComparisonData by reading from the given
       *  LinePushBackReader.
       **/
    
    tjc's avatar
    tjc committed
      public SimpleComparisonData(final LinePushBackReader stream)
          throws IOException 
      {
        final Vector align_match_vector = new Vector();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        String line;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        while( (line = stream.readLine()) != null )
        {
          if(line.trim().length() == 0) 
    
    tjc's avatar
    tjc committed
            continue;
    
    
    tjc's avatar
    tjc committed
          final AlignMatch new_match = makeMatchFromString(line);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          // not a blank line or a comment
          if(new_match != null) 
            align_match_vector.addElement(new_match);
    
    tjc's avatar
    tjc committed
        }
    
    
    tjc's avatar
    tjc committed
        final AlignMatch[] matches = new AlignMatch[align_match_vector.size()];
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        for(int i = 0; i < matches.length; ++i) 
          matches[i] = (AlignMatch)align_match_vector.elementAt(i);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        setMatches(matches);
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Create a new, empty instance of SimpleComparisonData.
       **/
    
    tjc's avatar
    tjc committed
      protected SimpleComparisonData() 
      {
    
    tjc's avatar
    tjc committed
      }
    
      /**
       *  Return an array containing all the AlignMatch objects for this
       *  comparison.
       **/
    
    tjc's avatar
    tjc committed
      public AlignMatch[] getMatches() 
      {
    
    tjc's avatar
    tjc committed
        return matches;
      }
    
    
    tjc's avatar
    tjc committed
      /**
       *  If this object contains only valid matches for a comparison between
       *  subject_sequence and query_sequence return null (subject_sequence is the
       *  subject of the comparison query_sequence is the query).  If the
       *  comparison would be valid if the data for the ends of the matches were
       *  swapped, then return a copy of this object with all the matches flipped.
       *  (For now, valid means that none of the matches goes over the end of the
       *  sequence.)
       *  @exception OutOfRangeException Thrown if the data in this object is not
       *    valid for either orientation.
       **/
    
    tjc's avatar
    tjc committed
      public ComparisonData flipMatchesIfNeeded(final Bases subject_sequence,
                                                final Bases query_sequence)
          throws OutOfRangeException 
      {
    
    tjc's avatar
    tjc committed
        final AlignMatch forward_error_match =
    
    tjc's avatar
    tjc committed
          checkMatches(subject_sequence, query_sequence);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        if(forward_error_match == null) 
    
    tjc's avatar
    tjc committed
          return null;
    
    tjc's avatar
    tjc committed
        else 
        {
    
    tjc's avatar
    tjc committed
          final AlignMatch reverse_error_match =
    
    tjc's avatar
    tjc committed
            checkMatches(query_sequence, subject_sequence);
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(reverse_error_match == null)
          {
    
    tjc's avatar
    tjc committed
            final SimpleComparisonData new_comparison_data =
    
    tjc's avatar
    tjc committed
                                       getNewSimpleComparisonData();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            int length = matches.length;
            final AlignMatch[] new_matches = new AlignMatch[length];
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
            for(int i = 0; i < length; ++i)
            {
    
    tjc's avatar
    tjc committed
              final AlignMatch this_match = matches[i];
    
              final AlignMatch new_match =
    
    tjc's avatar
    tjc committed
                new AlignMatch(this_match.getQuerySequenceRange(),
                               this_match.getSubjectSequenceRange(),
                               this_match.isRevMatch(),
                               this_match.getScore(),
                               this_match.getPercentID());
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              new_matches[i] = new_match;
    
    tjc's avatar
    tjc committed
            }
    
    
    tjc's avatar
    tjc committed
            new_comparison_data.setMatches(new_matches);
    
    tjc's avatar
    tjc committed
    
            return new_comparison_data;
    
    tjc's avatar
    tjc committed
          } 
          else
          {
    
    tjc's avatar
    tjc committed
            final String message;
    
    
    tjc's avatar
    tjc committed
            if(forward_error_match.getSubjectSequenceStart() >
               subject_sequence.getLength()) 
    
    tjc's avatar
    tjc committed
              message = "match goes off end of subject sequence: " +
    
    tjc's avatar
    tjc committed
                        forward_error_match.getSubjectSequenceStart();
            else 
            {
              if(forward_error_match.getSubjectSequenceEnd() >
                 subject_sequence.getLength()) 
    
    tjc's avatar
    tjc committed
                message = "match goes off end of subject sequence: " +
    
    tjc's avatar
    tjc committed
                          forward_error_match.getSubjectSequenceEnd();
              else
              {
                if(forward_error_match.getQuerySequenceStart() >
                   query_sequence.getLength())
    
    tjc's avatar
    tjc committed
                  message = "match goes off end of query sequence: " +
    
    tjc's avatar
    tjc committed
                             forward_error_match.getQuerySequenceStart();
                else 
                {
                  if(forward_error_match.getQuerySequenceEnd() >
                     query_sequence.getLength())
    
    tjc's avatar
    tjc committed
                    message = "match goes off end of query sequence: " +
    
    tjc's avatar
    tjc committed
                               forward_error_match.getQuerySequenceEnd();
                  else 
                    throw new Error("internal error - unreachable code");
    
    tjc's avatar
    tjc committed
                }
              }
            }
    
    tjc's avatar
    tjc committed
            throw new OutOfRangeException(message);
    
    tjc's avatar
    tjc committed
          }
        }
      }
    
      /**
       *  Returns a new, empty instance of this type of object;
       **/
    
    tjc's avatar
    tjc committed
      abstract protected SimpleComparisonData getNewSimpleComparisonData();
    
    tjc's avatar
    tjc committed
    
      /**
       *  Make an AlignMatch object from the given String.  The String must be in
       *  a format appropriate for this object.
       **/
    
    tjc's avatar
    tjc committed
      abstract protected AlignMatch makeMatchFromString(final String line)
    
    tjc's avatar
    tjc committed
          throws IOException;
    
      /**
       *  Return null if and only if this object contains only valid matches for a
       *  comparison between subject_sequence and query_sequence.  The first
       *  invalid AlignMatch is returned otherwise.
       **/
    
    tjc's avatar
    tjc committed
      private AlignMatch checkMatches(final Bases subject_sequence,
                                      final Bases query_sequence) 
      {
        int length = matches.length;
        for(int i = 0; i < length; ++i) 
        {
    
    tjc's avatar
    tjc committed
          final AlignMatch match = matches[i];
    
    
    tjc's avatar
    tjc committed
          if(match.getSubjectSequenceStart() > subject_sequence.getLength() ||
             match.getSubjectSequenceEnd() > subject_sequence.getLength()) 
    
    tjc's avatar
    tjc committed
            return match;
    
    
    tjc's avatar
    tjc committed
          if(match.getQuerySequenceStart() > query_sequence.getLength() ||
             match.getQuerySequenceEnd() > query_sequence.getLength())
    
    tjc's avatar
    tjc committed
            return match;
        }
    
        return null;
      }
    
    
      /**
       *  Set the array of AlignMatch objects.
       **/
    
    tjc's avatar
    tjc committed
      protected void setMatches(final AlignMatch[] matches) 
      {
    
    tjc's avatar
    tjc committed
        this.matches = matches;
    
    
    tjc's avatar
    tjc committed
        int length = matches.length;
        match_buffer = new AlignMatch[length];
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
        for(int i = 0 ; i < length ; ++i) 
        {
    
    tjc's avatar
    tjc committed
          final AlignMatch this_match = matches[i];
    
    
    tjc's avatar
    tjc committed
          final int score = this_match.getScore();
    
    tjc's avatar
    tjc committed
    
          final int this_match_subject_sequence_end =
    
    tjc's avatar
    tjc committed
            this_match.getSubjectSequenceEnd();
    
    tjc's avatar
    tjc committed
    
          final int this_match_query_sequence_end =
    
    tjc's avatar
    tjc committed
            this_match.getQuerySequenceEnd();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(this_match_subject_sequence_end > subject_sequence_max_base) 
    
    tjc's avatar
    tjc committed
            subject_sequence_max_base = this_match_subject_sequence_end;
    
    
    tjc's avatar
    tjc committed
          if(this_match_query_sequence_end > query_sequence_max_base) 
    
    tjc's avatar
    tjc committed
            query_sequence_max_base = this_match_query_sequence_end;
        }
      }
    
      /**
       *  Make and return a new AlignMatch.
       **/
    
    tjc's avatar
    tjc committed
      static protected AlignMatch makeAlignMatch(int subject_sequence_start,
                                                 int subject_sequence_end,
                                                 int query_sequence_start,
                                                 int query_sequence_end,
                                                 final int score,
                                                 final int percent_id) 
      {
        try 
        {
    
    tjc's avatar
    tjc committed
          // true if and only if the query hits the reverse complement of the
          // subject
          boolean rev_match = false;
    
    
    tjc's avatar
    tjc committed
          if(subject_sequence_end < subject_sequence_start) 
          {
    
    tjc's avatar
    tjc committed
            final int tmp = subject_sequence_start;
            subject_sequence_start = subject_sequence_end;
            subject_sequence_end = tmp;
            rev_match = !rev_match;
          }
    
    
    tjc's avatar
    tjc committed
          if(query_sequence_end < query_sequence_start) 
          {
    
    tjc's avatar
    tjc committed
            final int tmp = query_sequence_start;
            query_sequence_start = query_sequence_end;
            query_sequence_end = tmp;
            rev_match = !rev_match;
          }
    
    
    tjc's avatar
    tjc committed
          return new AlignMatch(new Range(subject_sequence_start,
                                          subject_sequence_end),
                                new Range(query_sequence_start,
                                          query_sequence_end),
                                rev_match, score, percent_id);
        }
        catch(OutOfRangeException e) 
        {
          throw new Error("internal error - unexpected exception: " + e);
    
    tjc's avatar
    tjc committed
        }
      }
    
      /**
       *  Set the values of min_score and max_score.
       **/
    
    tjc's avatar
    tjc committed
      private void setMinMaxScore()
      {
        int length = matches.length;
        for(int i = 0; i < length; ++i) 
        {
    
    tjc's avatar
    tjc committed
          final AlignMatch this_match = matches[i];
    
    
    tjc's avatar
    tjc committed
          final int score = this_match.getScore();
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
          if(score > -1) 
          {
            if(score > max_score) 
    
    tjc's avatar
    tjc committed
              max_score = score;
    
    
    tjc's avatar
    tjc committed
            if(score < min_score)
    
    tjc's avatar
    tjc committed
              min_score = score;
          }
        }
      }
    
      /**
       *  Return the maximum score of all the AlignMatch objects in this object.
       **/
    
    tjc's avatar
    tjc committed
      public int getMaximumScore() 
      {
        if(max_score == -1)
          setMinMaxScore();
    
    tjc's avatar
    tjc committed
    
        return max_score;
      }
    
      /**
       *  Return the minimum score of all the AlignMatch objects in this object.
       **/
    
    tjc's avatar
    tjc committed
      public int getMinimumScore() 
      {
        if(max_score == -1) 
          setMinMaxScore();
    
    tjc's avatar
    tjc committed
    
        return min_score;
      }
    
    }