Skip to content
Snippets Groups Projects
SimpleComparisonData.java 10.7 KiB
Newer Older
tjc's avatar
tjc committed
/* SimpleComparisonData.java
 *
 * created: Wed May 17 2000
 *
 * This file is part of Artemis
 *
 * Copyright (C) 2000  Genome Research Limited
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
tjc's avatar
tjc committed
 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/SimpleComparisonData.java,v 1.2 2004-12-14 10:41:42 tjc Exp $
tjc's avatar
tjc committed
 */

package uk.ac.sanger.artemis;

import uk.ac.sanger.artemis.sequence.*;

import uk.ac.sanger.artemis.io.Range;
import uk.ac.sanger.artemis.util.OutOfRangeException;
import uk.ac.sanger.artemis.util.LinePushBackReader;

import java.io.*;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.Hashtable;

/**
 *  This class contains methods that are common to all ComparisonData
 *  objects.  In particular it has methods for managing AlignMatch objects.
 *
 *  @author Kim Rutherford <kmr@sanger.ac.uk>
tjc's avatar
tjc committed
 *  @version $Id: SimpleComparisonData.java,v 1.2 2004-12-14 10:41:42 tjc Exp $
tjc's avatar
tjc committed
 **/

tjc's avatar
tjc committed
abstract class SimpleComparisonData implements ComparisonData 
{
  /** array of matches created by the constructor */
  private AlignMatch [] matches;

  /** array is used as a buffer */
  private AlignMatch [] match_buffer;

  /** Set by the constructor and returned by getMaximumScore() */
  private int max_score = -1;

  /** Set by the constructor and returned by getMinimumScore() */
  private int min_score = 999999999;

  /** Set by setMatches() to be the highest base we see in the subject */
  private int subject_sequence_max_base = -1;

  /** Set by setMatches() to be the highest base we see in the query   */
  private int query_sequence_max_base = -1;

tjc's avatar
tjc committed
  /**
   *  Create a new SimpleComparisonData by reading from the given
   *  LinePushBackReader.
   **/
tjc's avatar
tjc committed
  public SimpleComparisonData(final LinePushBackReader stream)
      throws IOException 
  {
    final Vector align_match_vector = new Vector();
tjc's avatar
tjc committed

tjc's avatar
tjc committed
    String line;
tjc's avatar
tjc committed

tjc's avatar
tjc committed
    while( (line = stream.readLine()) != null )
    {
      if(line.trim().length() == 0) 
tjc's avatar
tjc committed
        continue;

tjc's avatar
tjc committed
      final AlignMatch new_match = makeMatchFromString(line);
tjc's avatar
tjc committed

tjc's avatar
tjc committed
      // not a blank line or a comment
      if(new_match != null) 
        align_match_vector.addElement(new_match);
tjc's avatar
tjc committed
    }

tjc's avatar
tjc committed
    final AlignMatch[] matches = new AlignMatch[align_match_vector.size()];
tjc's avatar
tjc committed

tjc's avatar
tjc committed
    for(int i = 0; i < matches.length; ++i) 
      matches[i] = (AlignMatch)align_match_vector.elementAt(i);
tjc's avatar
tjc committed

tjc's avatar
tjc committed
    setMatches(matches);
tjc's avatar
tjc committed
  }

  /**
   *  Create a new, empty instance of SimpleComparisonData.
   **/
tjc's avatar
tjc committed
  protected SimpleComparisonData() 
  {
tjc's avatar
tjc committed
  }

  /**
   *  Return an array containing all the AlignMatch objects for this
   *  comparison.
   **/
tjc's avatar
tjc committed
  public AlignMatch[] getMatches() 
  {
tjc's avatar
tjc committed
    return matches;
  }

  /**
   *  If this object contains only valid matches for a comparison between
   *  subject_sequence and query_sequence return null (subject_sequence is the
   *  subject of the comparison query_sequence is the query).  If the
   *  comparison would be valid if the data for the ends of the matches were
   *  swapped, then return a copy of this object with all the matches flipped.
   *  (For now, valid means that none of the matches goes over the end of the
   *  sequence.)
   *  @exception OutOfRangeException Thrown if the data in this object is not
   *    valid for either orientation.
   **/
tjc's avatar
tjc committed
  public ComparisonData flipMatchesIfNeeded(final Bases subject_sequence,
                                            final Bases query_sequence)
      throws OutOfRangeException 
  {
tjc's avatar
tjc committed
    final AlignMatch forward_error_match =
tjc's avatar
tjc committed
      checkMatches(subject_sequence, query_sequence);
tjc's avatar
tjc committed

tjc's avatar
tjc committed
    if(forward_error_match == null) 
tjc's avatar
tjc committed
      return null;
tjc's avatar
tjc committed
    else 
    {
tjc's avatar
tjc committed
      final AlignMatch reverse_error_match =
tjc's avatar
tjc committed
        checkMatches(query_sequence, subject_sequence);
tjc's avatar
tjc committed

tjc's avatar
tjc committed
      if(reverse_error_match == null)
      {
tjc's avatar
tjc committed
        final SimpleComparisonData new_comparison_data =
tjc's avatar
tjc committed
                                   getNewSimpleComparisonData();
tjc's avatar
tjc committed

tjc's avatar
tjc committed
        int length = matches.length;
        final AlignMatch[] new_matches = new AlignMatch[length];
tjc's avatar
tjc committed

tjc's avatar
tjc committed
        for(int i = 0; i < length; ++i)
        {
tjc's avatar
tjc committed
          final AlignMatch this_match = matches[i];

          final AlignMatch new_match =
tjc's avatar
tjc committed
            new AlignMatch(this_match.getQuerySequenceRange(),
                           this_match.getSubjectSequenceRange(),
                           this_match.isRevMatch(),
                           this_match.getScore(),
                           this_match.getPercentID());
tjc's avatar
tjc committed

tjc's avatar
tjc committed
          new_matches[i] = new_match;
tjc's avatar
tjc committed
        }

tjc's avatar
tjc committed
        new_comparison_data.setMatches(new_matches);
tjc's avatar
tjc committed

        return new_comparison_data;
tjc's avatar
tjc committed
      } 
      else
      {
tjc's avatar
tjc committed
        final String message;

tjc's avatar
tjc committed
        if(forward_error_match.getSubjectSequenceStart() >
           subject_sequence.getLength()) 
tjc's avatar
tjc committed
          message = "match goes off end of subject sequence: " +
tjc's avatar
tjc committed
                    forward_error_match.getSubjectSequenceStart();
        else 
        {
          if(forward_error_match.getSubjectSequenceEnd() >
             subject_sequence.getLength()) 
tjc's avatar
tjc committed
            message = "match goes off end of subject sequence: " +
tjc's avatar
tjc committed
                      forward_error_match.getSubjectSequenceEnd();
          else
          {
            if(forward_error_match.getQuerySequenceStart() >
               query_sequence.getLength())
tjc's avatar
tjc committed
              message = "match goes off end of query sequence: " +
tjc's avatar
tjc committed
                         forward_error_match.getQuerySequenceStart();
            else 
            {
              if(forward_error_match.getQuerySequenceEnd() >
                 query_sequence.getLength())
tjc's avatar
tjc committed
                message = "match goes off end of query sequence: " +
tjc's avatar
tjc committed
                           forward_error_match.getQuerySequenceEnd();
              else 
                throw new Error("internal error - unreachable code");
tjc's avatar
tjc committed
            }
          }
        }
tjc's avatar
tjc committed
        throw new OutOfRangeException(message);
tjc's avatar
tjc committed
      }
    }
  }

  /**
   *  Returns a new, empty instance of this type of object;
   **/
tjc's avatar
tjc committed
  abstract protected SimpleComparisonData getNewSimpleComparisonData();
tjc's avatar
tjc committed

  /**
   *  Make an AlignMatch object from the given String.  The String must be in
   *  a format appropriate for this object.
   **/
tjc's avatar
tjc committed
  abstract protected AlignMatch makeMatchFromString(final String line)
tjc's avatar
tjc committed
      throws IOException;

  /**
   *  Return null if and only if this object contains only valid matches for a
   *  comparison between subject_sequence and query_sequence.  The first
   *  invalid AlignMatch is returned otherwise.
   **/
tjc's avatar
tjc committed
  private AlignMatch checkMatches(final Bases subject_sequence,
                                  final Bases query_sequence) 
  {
    int length = matches.length;
    for(int i = 0; i < length; ++i) 
    {
tjc's avatar
tjc committed
      final AlignMatch match = matches[i];

tjc's avatar
tjc committed
      if(match.getSubjectSequenceStart() > subject_sequence.getLength() ||
         match.getSubjectSequenceEnd() > subject_sequence.getLength()) 
tjc's avatar
tjc committed
        return match;

tjc's avatar
tjc committed
      if(match.getQuerySequenceStart() > query_sequence.getLength() ||
         match.getQuerySequenceEnd() > query_sequence.getLength())
tjc's avatar
tjc committed
        return match;
    }

    return null;
  }


  /**
   *  Set the array of AlignMatch objects.
   **/
tjc's avatar
tjc committed
  protected void setMatches(final AlignMatch[] matches) 
  {
tjc's avatar
tjc committed
    this.matches = matches;

tjc's avatar
tjc committed
    int length = matches.length;
    match_buffer = new AlignMatch[length];
tjc's avatar
tjc committed

tjc's avatar
tjc committed
    for(int i = 0 ; i < length ; ++i) 
    {
tjc's avatar
tjc committed
      final AlignMatch this_match = matches[i];

tjc's avatar
tjc committed
      final int score = this_match.getScore();
tjc's avatar
tjc committed

      final int this_match_subject_sequence_end =
tjc's avatar
tjc committed
        this_match.getSubjectSequenceEnd();
tjc's avatar
tjc committed

      final int this_match_query_sequence_end =
tjc's avatar
tjc committed
        this_match.getQuerySequenceEnd();
tjc's avatar
tjc committed

tjc's avatar
tjc committed
      if(this_match_subject_sequence_end > subject_sequence_max_base) 
tjc's avatar
tjc committed
        subject_sequence_max_base = this_match_subject_sequence_end;

tjc's avatar
tjc committed
      if(this_match_query_sequence_end > query_sequence_max_base) 
tjc's avatar
tjc committed
        query_sequence_max_base = this_match_query_sequence_end;
    }
  }

  /**
   *  Make and return a new AlignMatch.
   **/
tjc's avatar
tjc committed
  static protected AlignMatch makeAlignMatch(int subject_sequence_start,
                                             int subject_sequence_end,
                                             int query_sequence_start,
                                             int query_sequence_end,
                                             final int score,
                                             final int percent_id) 
  {
    try 
    {
tjc's avatar
tjc committed
      // true if and only if the query hits the reverse complement of the
      // subject
      boolean rev_match = false;

tjc's avatar
tjc committed
      if(subject_sequence_end < subject_sequence_start) 
      {
tjc's avatar
tjc committed
        final int tmp = subject_sequence_start;
        subject_sequence_start = subject_sequence_end;
        subject_sequence_end = tmp;
        rev_match = !rev_match;
      }

tjc's avatar
tjc committed
      if(query_sequence_end < query_sequence_start) 
      {
tjc's avatar
tjc committed
        final int tmp = query_sequence_start;
        query_sequence_start = query_sequence_end;
        query_sequence_end = tmp;
        rev_match = !rev_match;
      }

tjc's avatar
tjc committed
      return new AlignMatch(new Range(subject_sequence_start,
                                      subject_sequence_end),
                            new Range(query_sequence_start,
                                      query_sequence_end),
                            rev_match, score, percent_id);
    }
    catch(OutOfRangeException e) 
    {
      throw new Error("internal error - unexpected exception: " + e);
tjc's avatar
tjc committed
    }
  }

  /**
   *  Set the values of min_score and max_score.
   **/
tjc's avatar
tjc committed
  private void setMinMaxScore()
  {
    int length = matches.length;
    for(int i = 0; i < length; ++i) 
    {
tjc's avatar
tjc committed
      final AlignMatch this_match = matches[i];

tjc's avatar
tjc committed
      final int score = this_match.getScore();
tjc's avatar
tjc committed

tjc's avatar
tjc committed
      if(score > -1) 
      {
        if(score > max_score) 
tjc's avatar
tjc committed
          max_score = score;

tjc's avatar
tjc committed
        if(score < min_score)
tjc's avatar
tjc committed
          min_score = score;
      }
    }
  }

  /**
   *  Return the maximum score of all the AlignMatch objects in this object.
   **/
tjc's avatar
tjc committed
  public int getMaximumScore() 
  {
    if(max_score == -1)
      setMinMaxScore();
tjc's avatar
tjc committed

    return max_score;
  }

  /**
   *  Return the minimum score of all the AlignMatch objects in this object.
   **/
tjc's avatar
tjc committed
  public int getMinimumScore() 
  {
    if(max_score == -1) 
      setMinMaxScore();
tjc's avatar
tjc committed

    return min_score;
  }

}