Skip to content
Snippets Groups Projects
Commit dc1add71 authored by tjc's avatar tjc
Browse files

format

git-svn-id: svn+ssh://svn.internal.sanger.ac.uk/repos/svn/pathsoft/artemis/trunk@2131 ee4ac58c-ac51-4696-9907-e4b3aa274f04
parent 901d319f
Branches
Tags
No related merge requests found
......@@ -20,7 +20,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/AlignMatch.java,v 1.2 2004-10-04 10:00:34 tjc Exp $
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/AlignMatch.java,v 1.3 2004-12-14 10:41:42 tjc Exp $
*/
package uk.ac.sanger.artemis;
......@@ -31,7 +31,7 @@ import uk.ac.sanger.artemis.io.Range;
* Each object of this class represents a single match from an alignment.
*
* @author Kim Rutherford
* @version $Id: AlignMatch.java,v 1.2 2004-10-04 10:00:34 tjc Exp $
* @version $Id: AlignMatch.java,v 1.3 2004-12-14 10:41:42 tjc Exp $
**/
public class AlignMatch
......@@ -80,7 +80,6 @@ public class AlignMatch
getSubjectSequenceEnd());
}
public int getLength()
{
return match_length;
......@@ -177,5 +176,4 @@ public class AlignMatch
else
return false;
}
}
......@@ -20,7 +20,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/ComparisonData.java,v 1.1 2004-06-09 09:44:15 tjc Exp $
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/ComparisonData.java,v 1.2 2004-12-14 10:41:42 tjc Exp $
*/
package uk.ac.sanger.artemis;
......@@ -35,10 +35,11 @@ import uk.ac.sanger.artemis.sequence.*;
* for the alignment of two sequences.
*
* @author Kim Rutherford
* @version $Id: ComparisonData.java,v 1.1 2004-06-09 09:44:15 tjc Exp $
* @version $Id: ComparisonData.java,v 1.2 2004-12-14 10:41:42 tjc Exp $
**/
public interface ComparisonData {
public interface ComparisonData
{
/**
* Return an array containing all the AlignMatch objects for this
* comparison.
......@@ -50,8 +51,8 @@ public interface ComparisonData {
* first_seq_range on the first sequence or second_seq_range on the second
* sequence.
**/
public AlignMatch [] getMatchesInRange (final Range first_seq_range,
final Range second_seq_range);
//public AlignMatch[] getMatchesInRange(final Range first_seq_range,
// final Range second_seq_range);
/**
* If this object contain valid matches for a comparison between
......
......@@ -20,7 +20,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/SimpleComparisonData.java,v 1.1 2004-06-09 09:45:07 tjc Exp $
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/SimpleComparisonData.java,v 1.2 2004-12-14 10:41:42 tjc Exp $
*/
package uk.ac.sanger.artemis;
......@@ -41,43 +41,56 @@ import java.util.Hashtable;
* objects. In particular it has methods for managing AlignMatch objects.
*
* @author Kim Rutherford <kmr@sanger.ac.uk>
* @version $Id: SimpleComparisonData.java,v 1.1 2004-06-09 09:45:07 tjc Exp $
* @version $Id: SimpleComparisonData.java,v 1.2 2004-12-14 10:41:42 tjc Exp $
**/
abstract class SimpleComparisonData implements ComparisonData {
abstract class SimpleComparisonData implements ComparisonData
{
/** array of matches created by the constructor */
private AlignMatch [] matches;
/** array is used as a buffer */
private AlignMatch [] match_buffer;
/** Set by the constructor and returned by getMaximumScore() */
private int max_score = -1;
/** Set by the constructor and returned by getMinimumScore() */
private int min_score = 999999999;
/** Set by setMatches() to be the highest base we see in the subject */
private int subject_sequence_max_base = -1;
/** Set by setMatches() to be the highest base we see in the query */
private int query_sequence_max_base = -1;
/**
* Create a new SimpleComparisonData by reading from the given
* LinePushBackReader.
**/
public SimpleComparisonData(final LinePushBackReader stream)
throws IOException {
throws IOException
{
final Vector align_match_vector = new Vector();
while (true) {
final String line = stream.readLine ();
if (line == null) {
break;
}
String line;
if (line.trim ().length () == 0) {
while( (line = stream.readLine()) != null )
{
if(line.trim().length() == 0)
continue;
}
final AlignMatch new_match = makeMatchFromString(line);
if (new_match == null) {
// hit a blank line or a comment - loop again
} else {
// not a blank line or a comment
if(new_match != null)
align_match_vector.addElement(new_match);
}
}
final AlignMatch[] matches = new AlignMatch[align_match_vector.size()];
for (int i = 0 ; i < matches.length ; ++i) {
for(int i = 0; i < matches.length; ++i)
matches[i] = (AlignMatch)align_match_vector.elementAt(i);
}
setMatches(matches);
}
......@@ -85,87 +98,19 @@ abstract class SimpleComparisonData implements ComparisonData {
/**
* Create a new, empty instance of SimpleComparisonData.
**/
protected SimpleComparisonData () {
protected SimpleComparisonData()
{
}
/**
* Return an array containing all the AlignMatch objects for this
* comparison.
**/
public AlignMatch [] getMatches () {
public AlignMatch[] getMatches()
{
return matches;
}
/**
* Return all the AlignMatch objects in this comparison which overlap
* subject_seq_range on the subject sequence or query_seq_range on the query
* sequence.
**/
public AlignMatch [] getMatchesInRange (final Range subject_seq_range,
final Range query_seq_range) {
// a count of how many objects we have put into match_buffer so far.
int match_buffer_count = 0;
for (int i = 0 ; i < spare_buckets.size () ; ++i) {
final AlignMatch this_match = (AlignMatch) spare_buckets.elementAt (i);
if (matchInRange (this_match, subject_seq_range, query_seq_range)) {
match_buffer[match_buffer_count] = this_match;
++match_buffer_count;
}
}
// used to make sure we don't return any duplicates
final Hashtable table = new Hashtable (100);
for (int bucket_index = subject_seq_range.getStart () / BUCKET_SIZE ;
bucket_index < subject_seq_range.getEnd () / BUCKET_SIZE ;
++bucket_index) {
for (int i = 0 ;
i < subject_sequence_buckets[bucket_index].size () ;
++i) {
final AlignMatch this_match =
(AlignMatch) subject_sequence_buckets[bucket_index].elementAt (i);
if (this_match.getSubjectSequenceRange ().overlaps (subject_seq_range)) {
match_buffer[match_buffer_count] = this_match;
++match_buffer_count;
table.put (this_match, this_match);
}
}
}
for (int bucket_index = query_seq_range.getStart () / BUCKET_SIZE ;
bucket_index < query_seq_range.getEnd () / BUCKET_SIZE ;
++bucket_index) {
for (int i = 0 ;
i < query_sequence_buckets[bucket_index].size () ;
++i) {
final AlignMatch this_match =
(AlignMatch) query_sequence_buckets[bucket_index].elementAt (i);
if (table.containsKey (this_match)) {
continue;
}
if (this_match.getQuerySequenceRange ().overlaps (query_seq_range)) {
match_buffer[match_buffer_count] = this_match;
++match_buffer_count;
}
}
}
final AlignMatch [] return_matches = new AlignMatch [match_buffer_count];
System.arraycopy (match_buffer, 0,
return_matches, 0,
return_matches.length);
return return_matches;
}
/**
* If this object contains only valid matches for a comparison between
* subject_sequence and query_sequence return null (subject_sequence is the
......@@ -179,23 +124,28 @@ abstract class SimpleComparisonData implements ComparisonData {
**/
public ComparisonData flipMatchesIfNeeded(final Bases subject_sequence,
final Bases query_sequence)
throws OutOfRangeException {
throws OutOfRangeException
{
final AlignMatch forward_error_match =
checkMatches(subject_sequence, query_sequence);
if (forward_error_match == null) {
if(forward_error_match == null)
return null;
} else {
else
{
final AlignMatch reverse_error_match =
checkMatches(query_sequence, subject_sequence);
if (reverse_error_match == null) {
if(reverse_error_match == null)
{
final SimpleComparisonData new_comparison_data =
getNewSimpleComparisonData();
final AlignMatch [] new_matches = new AlignMatch [matches.length];
int length = matches.length;
final AlignMatch[] new_matches = new AlignMatch[length];
for (int i = 0 ; i < matches.length ; ++i) {
for(int i = 0; i < length; ++i)
{
final AlignMatch this_match = matches[i];
final AlignMatch new_match =
......@@ -211,35 +161,38 @@ abstract class SimpleComparisonData implements ComparisonData {
new_comparison_data.setMatches(new_matches);
return new_comparison_data;
} else {
}
else
{
final String message;
if(forward_error_match.getSubjectSequenceStart() >
subject_sequence.getLength ()) {
subject_sequence.getLength())
message = "match goes off end of subject sequence: " +
forward_error_match.getSubjectSequenceStart();
} else {
else
{
if(forward_error_match.getSubjectSequenceEnd() >
subject_sequence.getLength ()) {
subject_sequence.getLength())
message = "match goes off end of subject sequence: " +
forward_error_match.getSubjectSequenceEnd();
} else {
else
{
if(forward_error_match.getQuerySequenceStart() >
query_sequence.getLength ()) {
query_sequence.getLength())
message = "match goes off end of query sequence: " +
forward_error_match.getQuerySequenceStart();
} else {
else
{
if(forward_error_match.getQuerySequenceEnd() >
query_sequence.getLength ()) {
query_sequence.getLength())
message = "match goes off end of query sequence: " +
forward_error_match.getQuerySequenceEnd();
} else {
else
throw new Error("internal error - unreachable code");
}
}
}
}
throw new OutOfRangeException(message);
}
}
......@@ -263,49 +216,38 @@ abstract class SimpleComparisonData implements ComparisonData {
* invalid AlignMatch is returned otherwise.
**/
private AlignMatch checkMatches(final Bases subject_sequence,
final Bases query_sequence) {
for (int i = 0 ; i < matches.length ; ++i) {
final Bases query_sequence)
{
int length = matches.length;
for(int i = 0; i < length; ++i)
{
final AlignMatch match = matches[i];
if(match.getSubjectSequenceStart() > subject_sequence.getLength() ||
match.getSubjectSequenceEnd () > subject_sequence.getLength ()) {
match.getSubjectSequenceEnd() > subject_sequence.getLength())
return match;
}
if(match.getQuerySequenceStart() > query_sequence.getLength() ||
match.getQuerySequenceEnd () > query_sequence.getLength ()) {
match.getQuerySequenceEnd() > query_sequence.getLength())
return match;
}
}
return null;
}
/**
* Return true if and only if the given AlignMatch object overlaps
* subject_seq_range on the subject sequence or query_seq_range on the
* query sequence.
**/
private boolean matchInRange (final AlignMatch match,
final Range subject_seq_range,
final Range query_seq_range) {
if (match.getSubjectSequenceRange ().overlaps (subject_seq_range) ||
match.getQuerySequenceRange ().overlaps (query_seq_range)) {
return true;
} else {
return false;
}
}
/**
* Set the array of AlignMatch objects.
**/
protected void setMatches (final AlignMatch [] matches) {
protected void setMatches(final AlignMatch[] matches)
{
this.matches = matches;
match_buffer = new AlignMatch [matches.length];
int length = matches.length;
match_buffer = new AlignMatch[length];
for (int i = 0 ; i < matches.length ; ++i) {
for(int i = 0 ; i < length ; ++i)
{
final AlignMatch this_match = matches[i];
final int score = this_match.getScore();
......@@ -316,54 +258,13 @@ abstract class SimpleComparisonData implements ComparisonData {
final int this_match_query_sequence_end =
this_match.getQuerySequenceEnd();
if (this_match_subject_sequence_end > subject_sequence_max_base) {
if(this_match_subject_sequence_end > subject_sequence_max_base)
subject_sequence_max_base = this_match_subject_sequence_end;
}
if (this_match_query_sequence_end > query_sequence_max_base) {
if(this_match_query_sequence_end > query_sequence_max_base)
query_sequence_max_base = this_match_query_sequence_end;
}
}
}
/**
* The number of base per bucket.
**/
final private int BUCKET_SIZE = 1000;
/**
* Create subject_sequence_buckets, query_sequence_buckets and
* spare_buckets.
**/
private void makeBuckets () {
subject_sequence_buckets =
new Vector [subject_sequence_max_base / BUCKET_SIZE + 1];
query_sequence_buckets =
new Vector [query_sequence_max_base / BUCKET_SIZE + 1];
for (int i = 0 ; i < matches.length ; ++i) {
final AlignMatch match = matches[i];
if (match.getSubjectSequenceRange ().getCount () > BUCKET_SIZE ||
match.getQuerySequenceRange ().getCount () > BUCKET_SIZE) {
spare_buckets.addElement (match);
} else {
final int match_subject_sequence_start =
match.getSubjectSequenceStart ();
final int match_query_sequence_start =
match.getQuerySequenceStart ();
final int subject_buckets_index =
match_subject_sequence_start / BUCKET_SIZE;
subject_sequence_buckets[subject_buckets_index].addElement (match);
final int query_buckets_index =
match_query_sequence_start / BUCKET_SIZE;
query_sequence_buckets[query_buckets_index].addElement (match);
}
}
}
/**
* Make and return a new AlignMatch.
......@@ -373,20 +274,24 @@ abstract class SimpleComparisonData implements ComparisonData {
int query_sequence_start,
int query_sequence_end,
final int score,
final int percent_id) {
try {
final int percent_id)
{
try
{
// true if and only if the query hits the reverse complement of the
// subject
boolean rev_match = false;
if (subject_sequence_end < subject_sequence_start) {
if(subject_sequence_end < subject_sequence_start)
{
final int tmp = subject_sequence_start;
subject_sequence_start = subject_sequence_end;
subject_sequence_end = tmp;
rev_match = !rev_match;
}
if (query_sequence_end < query_sequence_start) {
if(query_sequence_end < query_sequence_start)
{
final int tmp = query_sequence_start;
query_sequence_start = query_sequence_end;
query_sequence_end = tmp;
......@@ -398,7 +303,9 @@ abstract class SimpleComparisonData implements ComparisonData {
new Range(query_sequence_start,
query_sequence_end),
rev_match, score, percent_id);
} catch (OutOfRangeException e) {
}
catch(OutOfRangeException e)
{
throw new Error("internal error - unexpected exception: " + e);
}
}
......@@ -406,31 +313,33 @@ abstract class SimpleComparisonData implements ComparisonData {
/**
* Set the values of min_score and max_score.
**/
private void setMinMaxScore () {
for (int i = 0 ; i < matches.length ; ++i) {
private void setMinMaxScore()
{
int length = matches.length;
for(int i = 0; i < length; ++i)
{
final AlignMatch this_match = matches[i];
final int score = this_match.getScore();
if (score > -1) {
if (score > max_score) {
if(score > -1)
{
if(score > max_score)
max_score = score;
}
if (score < min_score) {
if(score < min_score)
min_score = score;
}
}
}
}
/**
* Return the maximum score of all the AlignMatch objects in this object.
**/
public int getMaximumScore () {
if (max_score == -1) {
public int getMaximumScore()
{
if(max_score == -1)
setMinMaxScore();
}
return max_score;
}
......@@ -438,64 +347,12 @@ abstract class SimpleComparisonData implements ComparisonData {
/**
* Return the minimum score of all the AlignMatch objects in this object.
**/
public int getMinimumScore () {
if (max_score == -1) {
public int getMinimumScore()
{
if(max_score == -1)
setMinMaxScore();
}
return min_score;
}
/**
* This is the array of matches created by the constructor.
**/
private AlignMatch [] matches;
/**
* This is the array is used as a buffer by getMatchesInRange ().
**/
private AlignMatch [] match_buffer;
/**
* Set by the constructor and returned by getMaximumScore ().
**/
private int max_score = -1;
/**
* Set by the constructor and returned by getMinimumScore ().
**/
private int min_score = 999999999;
/**
* Set by setMatches () to be the highest base we see in the subject
* sequence.
**/
private int subject_sequence_max_base = -1;
/**
* Set by setMatches () to be the highest base we see in the query
* sequence.
**/
private int query_sequence_max_base = -1;
/**
* This array contains a Vector for each BUCKET_SIZE bases in the subject
* sequence. All AlignMatch objects where the match start in the subject
* sequence (ie AlignMatch.getSubjectSequenceStart ()) is >= 1 and <=
* BUCKET_SIZE will be put in the subject bucket. If >= BUCKET_SIZE + 1
* and <= BUCKET_SIZE * 2 it will be in the query bucket, etc.
**/
private Vector [] subject_sequence_buckets = null;
/**
* This array contains a Vector for each BUCKET_SIZE bases in the query
* sequence.
**/
private Vector [] query_sequence_buckets = null;
/**
* This Vector contains the AlignMatch objects where the match is bigger
* than BUCKET_SIZE in either of the sequences.
**/
private Vector spare_buckets = new Vector ();
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment