Skip to content
Snippets Groups Projects
Commit 597cf405 authored by tjc's avatar tjc
Browse files

introduce a new user data format with the base position in the first column

git-svn-id: svn+ssh://svn.internal.sanger.ac.uk/repos/svn/pathsoft/artemis/trunk@11188 ee4ac58c-ac51-4696-9907-e4b3aa274f04
parent 46fc638a
No related branches found
No related tags found
No related merge requests found
......@@ -20,7 +20,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/plot/UserDataAlgorithm.java,v 1.7 2009-03-16 14:09:51 tjc Exp $
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/plot/UserDataAlgorithm.java,v 1.8 2009-06-24 14:42:33 tjc Exp $
*/
package uk.ac.sanger.artemis.plot;
......@@ -31,6 +31,7 @@ import uk.ac.sanger.artemis.util.*;
import uk.ac.sanger.artemis.io.ReadFormatException;
import java.io.*;
import java.util.HashMap;
import java.util.regex.Pattern;
/**
......@@ -40,42 +41,53 @@ import java.util.regex.Pattern;
* set in the constructor.
*
* @author Kim Rutherford <kmr@sanger.ac.uk>
* @version $Id: UserDataAlgorithm.java,v 1.7 2009-03-16 14:09:51 tjc Exp $
* @version $Id: UserDataAlgorithm.java,v 1.8 2009-06-24 14:42:33 tjc Exp $
**/
public class UserDataAlgorithm extends BaseAlgorithm
{
/**
* The data that was read by the constructor.
**/
/** A base per line file format */
private static int BASE_PER_LINE_FORMAT = 1;
/** Base position is specified in the first column file format */
private static int BASE_SPECIFIED_FORMAT = 2;
/** The data read by the constructor - for BASE_PER_LINE_FORMAT */
private float data[][] = null;
/**
* The maximum value in the data array.
**/
/** The data read by the constructor - for BASE_SPECIFIED_FORMAT */
private HashMap<Integer, float[]> dataMap;
/** The maximum value in the data array. */
private float data_max = Float.MIN_VALUE;
/**
* The minimum value in the data array.
**/
/** The minimum value in the data array. */
private float data_min = Float.MAX_VALUE;
/**
* The average calculated by readData ().
**/
/** Default window size */
private int default_window_size = 3;
/** The average calculated by readData (). */
private float average_value = 0;
/**
* The value returned by getValueCount ().
**/
/** The value returned by getValueCount (). */
private int number_of_values;
private boolean logTransform;
/** Format type for this instance */
private int FORMAT = BASE_PER_LINE_FORMAT;
/**
* Create a new UserDataAlgorithm object.
* Create a new UserDataAlgorithm object. This reads a file
* which can be one of two types of formats:
* a. one line of values per base.
* b. the first column specifies the base position with
* subsequent columns being values.
* @param strand The strand to do the calculation on.
* @param document The Document to read the data from.
* @param logTransform true if the log transformation is to be
* shown.
**/
public UserDataAlgorithm (final Strand strand, final Document document,
final boolean logTransform)
......@@ -88,15 +100,28 @@ public class UserDataAlgorithm extends BaseAlgorithm
LinePushBackReader pushback_reader = new LinePushBackReader (document_reader);
final String first_line = pushback_reader.readLine ();
final StringVector tokens = StringVector.getStrings (first_line, " ");
String first_line = pushback_reader.readLine ();
if(first_line.startsWith("#"))
{
FORMAT = BASE_SPECIFIED_FORMAT;
first_line = pushback_reader.readLine ().trim();
while(first_line.equals("") || first_line.equals("#"))
first_line = pushback_reader.readLine ().trim();
}
else
FORMAT = BASE_PER_LINE_FORMAT;
if (tokens.size () < 1)
final Pattern patt = Pattern.compile("\\s+");
String tokens[] = patt.split(first_line);
if (tokens.length < 1)
throw new ReadFormatException ("unknown file type");
this.number_of_values = tokens.size ();
this.number_of_values = tokens.length;
pushback_reader.pushBack (first_line);
data = new float [strand.getSequenceLength ()][tokens.size ()];
if(FORMAT == BASE_PER_LINE_FORMAT)
data = new float [strand.getSequenceLength ()][tokens.length];
readData (pushback_reader);
pushback_reader.close();
......@@ -110,6 +135,8 @@ public class UserDataAlgorithm extends BaseAlgorithm
{
String line = null;
int count = 0;
int countAll = 0;
int estimate_window_size = Integer.MAX_VALUE;
final int seqLength = getStrand ().getSequenceLength ();
final Pattern patt = Pattern.compile("\\s+");
......@@ -118,46 +145,71 @@ public class UserDataAlgorithm extends BaseAlgorithm
if (count >= seqLength)
throw new ReadFormatException ("too many values in input file");
//final StringVector tokens = StringVector.getStrings (line, " ");
String tokens[] = patt.split(line);
if (FORMAT == BASE_PER_LINE_FORMAT && tokens.length != data[0].length)
throw new ReadFormatException ("line has the wrong number of fields:\n"+line);
String tokens[] = patt.split(line); //line.split("\\s+");
if (tokens.length == data[0].length)
{
int base = 0;
float line_data[] = new float[tokens.length-1];
for (int i = 0 ; i < tokens.length ; ++i)
{
try
{
float value = Float.parseFloat(tokens[i]);
//Float.valueOf ((String)tokens.elementAt (i)).floatValue ();
if(FORMAT == BASE_SPECIFIED_FORMAT && i == 0)
{
int last_base = base;
base = (int) Float.parseFloat(tokens[i]);
if(base > seqLength)
throw new ReadFormatException (
"a base position is greater than the sequence length:\n"+line);
if((base - last_base) < estimate_window_size &&
(base - last_base) > 0)
estimate_window_size = base - last_base;
if(dataMap == null)
dataMap = new HashMap<Integer, float[]>();
continue;
}
float value = Float.parseFloat(tokens[i]);
if(logTransform)
value = (float) Math.log(value+1);
if (value > data_max)
data_max = value;
if (value < data_min)
data_min = value;
if(FORMAT == BASE_PER_LINE_FORMAT)
data[count][i] = value;
else
{
line_data[i-1] = value;
countAll++;
}
average_value += value;
}
catch (NumberFormatException e)
{
throw new ReadFormatException ("cannot understand this number: " +
tokens[i] + " - " +
e.getMessage ());
}
tokens[i] + " - " +e.getMessage ());
}
}
else
throw new ReadFormatException ("line has the wrong number of fields:\n"+line);
++count;
if(FORMAT == BASE_SPECIFIED_FORMAT)
dataMap.put(base, line_data);
}
if (FORMAT == BASE_PER_LINE_FORMAT)
average_value /= data[0].length * seqLength;
else
{
average_value = average_value/countAll;
if(estimate_window_size != Integer.MAX_VALUE)
default_window_size = estimate_window_size;
}
}
/**
......@@ -179,6 +231,27 @@ public class UserDataAlgorithm extends BaseAlgorithm
start = getStrand().getBases().getComplementPosition(tend);
}
if(FORMAT == BASE_SPECIFIED_FORMAT)
{
for (int i = 0 ; i < value_count ; ++i)
{
values[i] = 0;
int count = 0;
for (int base = start ; base <= end ; ++base)
{
if(dataMap.containsKey(base))
{
values[i] += ((float[])dataMap.get(base))[i];
count++;
}
}
if(count > 1)
values[i] = values[i]/count;
}
}
else
{
for (int i = 0 ; i < value_count ; ++i)
{
values [i] = 0;
......@@ -186,6 +259,7 @@ public class UserDataAlgorithm extends BaseAlgorithm
values [i] += data[base - 1][i] / (end - start + 1);
}
}
}
/**
* Return the number of values a call to getValues () will return - one
......@@ -193,6 +267,8 @@ public class UserDataAlgorithm extends BaseAlgorithm
**/
public int getValueCount ()
{
if(FORMAT == BASE_SPECIFIED_FORMAT)
return number_of_values -1;
return number_of_values;
}
......@@ -203,7 +279,7 @@ public class UserDataAlgorithm extends BaseAlgorithm
**/
public Integer getDefaultWindowSize ()
{
return new Integer (3);
return new Integer (default_window_size);
}
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment