Newer
Older
package uk.ac.sanger.artemis.components.alignment;
import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.Point;
import java.awt.Toolkit;
import java.io.File;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JProgressBar;
import uk.ac.sanger.artemis.Entry;
import uk.ac.sanger.artemis.EntryGroup;
import uk.ac.sanger.artemis.Feature;
import uk.ac.sanger.artemis.FeatureVector;
import uk.ac.sanger.artemis.components.FeatureDisplay;
import uk.ac.sanger.artemis.components.FileViewer;
import uk.ac.sanger.artemis.components.SwingWorker;
import uk.ac.sanger.artemis.io.EntryInformationException;
import uk.ac.sanger.artemis.io.Key;
import uk.ac.sanger.artemis.io.Range;
import uk.ac.sanger.artemis.sequence.Bases;
import uk.ac.sanger.artemis.sequence.MarkerRange;
import uk.ac.sanger.artemis.util.OutOfRangeException;
import uk.ac.sanger.artemis.util.ReadOnlyException;
import net.sf.samtools.SAMFileReader;
public class MappedReads
{
private JProgressBar progressBar;
private JLabel progressTxt = new JLabel();
private FeatureVector features;
private String refName;
private Hashtable<String, SAMFileReader> samFileReaderHash;
private List<String> bamList;
private List<Integer> hideBamList;
private int sequenceLength;
private SAMRecordPredicate samRecordFlagPredicate;
private SAMRecordMapQPredicate samRecordMapQPredicate;
private boolean contained;
private boolean useIntrons;
private JDialog dialog = new JDialog((JFrame)null, "Calculating", true);;
private int mappedReads[];
/**
* Calculate the total number of mapped reads.
* @param refName
* @param samFileReaderHash
* @param bamList
* @param seqNames
* @param offsetLengths
* @param concatSequences
* @param seqLengths
* @param sequenceLength
*/
public MappedReads(
final FeatureVector features,
final String refName,
final Hashtable<String, SAMFileReader> samFileReaderHash,
final List<String> bamList,
final Vector<String> seqNames,
final int sequenceLength,
final SAMRecordPredicate samRecordFlagPredicate,
SAMRecordMapQPredicate samRecordMapQPredicate,
final boolean contained,
final boolean useIntrons,
final boolean allRefSeqs)
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
{
this.features = features;
this.refName = refName;
this.samFileReaderHash = samFileReaderHash;
this.bamList = bamList;
this.seqNames = seqNames;
this.offsetLengths = offsetLengths;
this.concatSequences = concatSequences;
this.seqLengths = seqLengths;
this.sequenceLength = sequenceLength;
this.samRecordFlagPredicate = samRecordFlagPredicate;
this.samRecordMapQPredicate = samRecordMapQPredicate;
this.contained = contained;
this.useIntrons = useIntrons;
progressBar = new JProgressBar(0, sequenceLength);
progressBar.setValue(0);
progressBar.setStringPainted(true);
JPanel panel = new JPanel(new BorderLayout());
progressTxt.setText("Total number of mapped reads");
panel.add(progressTxt, BorderLayout.NORTH);
panel.add(progressBar, BorderLayout.CENTER);
dialog.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
panel.setOpaque(true);
dialog.setContentPane(panel);
dialog.pack();
centerDialog();
CalculateTotalMappedReads cmr = new CalculateTotalMappedReads(allRefSeqs);
cmr.start();
dialog.setVisible(true);
}
/**
* Read count for selected features.
* @param features
* @param refName
* @param samFileReaderHash
* @param bamList
* @param seqNames
* @param offsetLengths
* @param concatSequences
* @param seqLengths
* @param samRecordFlagPredicate
* @param samRecordMapQPredicate
* @param contained
* @param useIntrons
*/
public MappedReads(
final FeatureVector features,
final String refName,
final Hashtable<String, SAMFileReader> samFileReaderHash,
final List<String> bamList,
final Vector<String> seqNames,
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
final SAMRecordPredicate samRecordFlagPredicate,
final SAMRecordMapQPredicate samRecordMapQPredicate,
final boolean contained,
final boolean useIntrons)
{
this.features = features;
this.refName = refName;
this.samFileReaderHash = samFileReaderHash;
this.bamList = bamList;
this.seqNames = seqNames;
this.offsetLengths = offsetLengths;
this.concatSequences = concatSequences;
this.seqLengths = seqLengths;
this.samRecordFlagPredicate = samRecordFlagPredicate;
this.samRecordMapQPredicate = samRecordMapQPredicate;
this.contained = contained;
this.useIntrons = useIntrons;
progressBar = new JProgressBar(0, features.size());
progressBar.setValue(0);
progressBar.setStringPainted(true);
JPanel panel = new JPanel(new BorderLayout());
progressTxt.setText("Number of mapped reads for "+features.size()+" features");
panel.add(progressTxt, BorderLayout.NORTH);
panel.add(progressBar, BorderLayout.CENTER);
panel.setOpaque(true);
dialog.setContentPane(panel);
dialog.pack();
centerDialog();
CalculateMappedReads cmr = new CalculateMappedReads();
cmr.start();
dialog.setVisible(true);
}
/**
* Search for new features based on a threshold of read counts in the intergenic
* and anti-sense regions of existing annotations. This should exclude rRNA and
* tRNA regions. If two or more BAM files are loaded it should create features
* based on the combined read peak span.
* @param refName
* @param bamView
* @param samFileReaderHash
* @param bamList
* @param seqNames
* @param offsetLengths
* @param concatSequences
* @param seqLengths
* @param groupsFrame
* @param threshold
* @param minSize
* @param minBams
public MappedReads(
final BamView bamView,
final Hashtable<String, SAMFileReader> samFileReaderHash,
final Vector<String> seqNames,
final HashMap<String, Integer> offsetLengths,
final boolean concatSequences,
final HashMap<String, Integer> seqLengths,
final GroupBamFrame groupsFrame,
final int threshold,
final int minSize,
final int minBams,
final boolean contained)
{
this.refName = refName;
this.samFileReaderHash = samFileReaderHash;
this.bamList = bamView.bamList;
this.hideBamList = bamView.hideBamList;
this.seqNames = seqNames;
this.offsetLengths = offsetLengths;
this.concatSequences = concatSequences;
this.seqLengths = seqLengths;
this.samRecordFlagPredicate = bamView.getSamRecordFlagPredicate();
this.samRecordMapQPredicate = bamView.getSamRecordMapQPredicate();
final FeatureDisplay feature_display = bamView.getFeatureDisplay();
progressBar = new JProgressBar(0, feature_display.getSequenceLength());
progressBar.setValue(0);
progressBar.setStringPainted(true);
JPanel panel = new JPanel(new BorderLayout());
progressTxt.setText("");
panel.add(progressTxt, BorderLayout.NORTH);
panel.add(progressBar, BorderLayout.CENTER);
panel.setOpaque(true);
dialog.setTitle("Search");
dialog.setContentPane(panel);
dialog.pack();
centerDialog();
final CalculateNewFeatures cmr = new CalculateNewFeatures(
feature_display, refName, groupsFrame, threshold, minSize, minBams);
private void centerDialog()
{
final Dimension screen = Toolkit.getDefaultToolkit().getScreenSize();
final int x_position =(screen.width - dialog.getSize().width) / 2;
int y_position =(screen.height - dialog.getSize().height) / 2;
if(y_position < 10)
y_position = 10;
dialog.setLocation(new Point(x_position, y_position));
}
class CalculateMappedReads extends SwingWorker
{
Hashtable<String, List<ReadCount>> featureReadCount;
featureReadCount = new Hashtable<String, List<ReadCount>>();
for (int i = 0; i < features.size(); i++)
{
Feature f = features.elementAt(i);
progressBar.setValue(i);
int start = f.getRawFirstBase();
int end = f.getRawLastBase();
float fLen = BamUtils.getFeatureLength(f);
List<ReadCount> sampleCounts = new Vector<ReadCount>();
for (int j = 0; j < bamList.size(); j++)
{
String bam = bamList.get(j);
float cnt[] = new float[2];
if (!useIntrons && f.getSegments().size() > 1)
{
for (int k = 0; k < f.getSegments().size(); k++)
{
start = f.getSegments().elementAt(k).getRawRange().getStart();
end = f.getSegments().elementAt(k).getRawRange().getEnd();
float tmpcnt[] = new float[2];
tmpcnt = BamUtils.getCount(start, end, bam, refName, samFileReaderHash,
seqNames, offsetLengths, concatSequences, seqLengths,
samRecordFlagPredicate, samRecordMapQPredicate, contained);
cnt[0] += tmpcnt[0];
cnt[1] += tmpcnt[1];
}
}
else
cnt = BamUtils.getCount(start, end, bam, refName, samFileReaderHash, seqNames,
offsetLengths, concatSequences, seqLengths,
samRecordFlagPredicate, samRecordMapQPredicate, contained);
if (mappedReads != null)
{
cnt[0] = (cnt[0] / (((float) mappedReads[j] / 1000000.f) * (fLen / 1000.f)));
cnt[1] = (cnt[1] / (((float) mappedReads[j] / 1000000.f) * (fLen / 1000.f)));
}
sampleCounts.add( new ReadCount(cnt, f.isForwardFeature()) );
featureReadCount.put(ReadCountDialog.getFeatureName(f), sampleCounts);
final DecimalFormat df;
if(mappedReads != null)
df = new DecimalFormat("0.000");
else
df = new DecimalFormat("0");
StringBuffer buff = new StringBuffer();
for (int j = 0; j < bamList.size(); j++)
{
String bam = bamList.get(j);
buff.append("#BAM: " + bam);
if (mappedReads != null)
{
final DecimalFormat df2 = new DecimalFormat("0.000000");
+ df2.format(((float) mappedReads[j]) / 1000000.f));
}
buff.append("\n");
}
buff.append("\n");
Enumeration<String> enumFeat = featureReadCount.keys();
int n = 0;
while (enumFeat.hasMoreElements())
String fId = enumFeat.nextElement();
if(n == 0)
{
for (int i = 0; i < ((String)fId).length(); i++)
buff.append(" ");
buff.append("\t");
for (int j = 0; j < bamList.size(); j++)
{
if(mappedReads != null)
buff.append(" Sense Antisense Total\t");
else
buff.append(" Sense Antisense Total\t");
}
buff.append("\n");
}
List<ReadCount> cnts = featureReadCount.get(fId);
for (ReadCount c: cnts)
{
pad(buff, c.senseCnt, df);
buff.append(" ");
pad(buff, c.antiCnt, df);
buff.append(" ");
pad(buff, c.senseCnt+c.antiCnt, df);
buff.append("\t");
}
}
FileViewer viewer;
if (mappedReads != null)
viewer = new FileViewer("RPKM", true, false, true);
else
viewer = new FileViewer("Read Count", true, false, true);
viewer.getTextPane().setText(buff.toString());
dialog.dispose();
}
}
private static void pad(StringBuffer buff, float f, final DecimalFormat df)
{
if(f < 10)
buff.append(" ");
else if(f < 100)
buff.append(" ");
else if(f < 1000)
buff.append(" ");
else if(f < 10000)
buff.append(" ");
else if(f < 100000)
buff.append(" ");
else if(f < 1000000)
buff.append(" ");
buff.append(df.format(f));
}
class CalculateTotalMappedReads extends SwingWorker
{
private boolean useAllRefSeqs;
CalculateTotalMappedReads(boolean useAllRefSeqs)
{
this.useAllRefSeqs = useAllRefSeqs;
}
public Object construct()
{
mappedReads = new int[bamList.size()];
if(concatSequences || !useAllRefSeqs)
calc(refName, sequenceLength);
else
{
for (String name : seqNames)
{
progressTxt.setText(name);
int thisLength = seqLengths.get(name);
progressBar.setValue(0);
progressBar.setMaximum(thisLength);
calc(name, thisLength);
}
}
progressBar.setValue(0);
progressBar.setMaximum(features.size());
progressTxt.setText("RPKM values for "+features.size()+" features");
CalculateMappedReads cmr = new CalculateMappedReads();
cmr.start();
return null;
}
{
int MAX_BASE_CHUNK = 2000 * 60;
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
boolean contained = false;
for (int i = 0; i < sequenceLength; i += MAX_BASE_CHUNK)
{
progressBar.setValue(i);
int sbegc = i;
int sendc = i + MAX_BASE_CHUNK - 1;
for (int j = 0; j < bamList.size(); j++)
{
String bam = bamList.get(j);
if (concatSequences)
{
int len = 0;
int lastLen = 1;
for (String name : seqNames)
{
int thisLength = seqLengths.get(name);
len += thisLength;
if ((lastLen >= sbegc && lastLen < sendc)
|| (len >= sbegc && len < sendc)
|| (sbegc >= lastLen && sbegc < len)
|| (sendc >= lastLen && sendc < len))
{
int offset = offsetLengths.get(name);
int thisStart = sbegc - offset;
if (thisStart < 1)
thisStart = 1;
int thisEnd = sendc - offset;
if (thisEnd > thisLength)
thisEnd = thisLength;
mappedReads[j] += BamUtils.count(bam, samFileReaderHash, name,
thisStart, thisEnd, samRecordFlagPredicate,
samRecordMapQPredicate, contained, false)[0];
}
lastLen = len;
}
}
else
{
mappedReads[j] += BamUtils.count(bam, samFileReaderHash, refName, sbegc,
sendc, samRecordFlagPredicate, samRecordMapQPredicate,
/**
* Find new features from read count peaks.
*/
class CalculateNewFeatures extends SwingWorker
{
private EntryGroup entryGroup;
private Bases bases;
private String refSeq;
private int threshold;
private int minSize;
private int minBams;
private GroupBamFrame groupsFrame;
CalculateNewFeatures(final FeatureDisplay feature_display,
final String refSeq,
final GroupBamFrame groupsFrame,
final int threshold,
final int minSize,
final int minBams)
{
entryGroup = feature_display.getEntryGroup();
bases = feature_display.getBases();
this.refSeq = refSeq;
this.groupsFrame = groupsFrame;
this.threshold = threshold;
this.minSize = minSize;
this.minBams = minBams;
}
class MarkerObj
{
MarkerRange r;
int bamIdx;
MarkerObj(MarkerRange r, int bamIdx)
{
this.r = r;
this.bamIdx = bamIdx;
}
final int MAX_BASE_CHUNK = 2000 * 60;
Key excluseKeys[] = { new Key("rRNA"), new Key("tRNA") };
final int beg = 1;
final int end = bases.getLength();
final List<MarkerObj> fwdMarkers = new Vector<MarkerObj>();
final List<MarkerObj> revMarkers = new Vector<MarkerObj>();
if(hideBamList.contains(i))
continue;
for(int j=beg; j<end; j+=MAX_BASE_CHUNK)
{
progressBar.setValue((j + (i*end)) / bamList.size());
if(j > end)
continue;
int start = j;
int stop = j+MAX_BASE_CHUNK;
try
{
int cnt[][] = new int[stop-start+1][2];
for (int row = 0; row < cnt.length; row++)
for (int col = 0; col < 2; col++)
cnt[row][col] = 0;
if (concatSequences)
{
for (String name : seqNames)
{
int len = seqLengths.get(name);
int offset = offsetLengths.get(name);
if( (start >= offset && start <= offset+len) ||
(stop >= offset && start <= offset+len) )
{
int thisStart = start-offset;
if(thisStart < 1)
thisStart = 1;
int thisEnd = stop-offset;
if(thisEnd > len)
thisEnd = len;
int concatShift = 0;
if(offset > start)
concatShift = offset-start;
cnt =
BamUtils.countOverRange(bamList.get(i), samFileReaderHash,
name, thisStart, thisEnd, concatShift, cnt,
samRecordFlagPredicate, samRecordMapQPredicate);
}
}
}
else
{
cnt =
BamUtils.countOverRange(bamList.get(i), samFileReaderHash,
refSeq, start, stop, 0, cnt,
samRecordFlagPredicate, samRecordMapQPredicate);
}
for(int k=0; k<cnt.length; k++)
{
final Range r = new Range(start+k, start+k+1);
// find forward strand potential features
fwdStart = findFeatures(cnt[k][0], true, fwdStart, j+k,
r, excluseKeys, fwdMarkers, entryGroup, i);
// find reverse strand potential features
revStart = findFeatures(cnt[k][1], false, revStart, j+k,
r, excluseKeys, revMarkers, entryGroup, i);
}
}
catch (OutOfRangeException e1)
{
e1.printStackTrace();
}
}
}
final Entry newEntry = entryGroup.createEntry ("align_"+threshold+"_"+minBams+"_"+minSize);
createFeatures(fwdMarkers, true, newEntry);
createFeatures(revMarkers, false, newEntry);
return null;
}
private void createFeatures(final List<MarkerObj> markers, final boolean isFwd, final Entry newEntry)
final Key key = Key.CDS;
final Set<Integer> ignoreMarkers = new HashSet<Integer>();
for(int i=0; i<markers.size(); i++)
{
if(ignoreMarkers.contains(i))
continue;
Set<Integer> bamIdxList = new HashSet<Integer>();
MarkerObj mi = markers.get(i);
MarkerRange mri = mi.r;
bamIdxList.add(mi.bamIdx);
for(int j=i+1; j<markers.size(); j++)
{
if(ignoreMarkers.contains(j))
continue;
MarkerObj mj = markers.get(j);
MarkerRange mrj = mj.r;
if(mri.overlaps(mrj))
bamIdxList.add(mj.bamIdx);
mri = mri.combineRanges(mrj, false);
if(bamIdxList.size() >= minBams)
if(groupsFrame != null && minBams > 1)
{
boolean foundMinBams = false;
Hashtable<String, Integer> groupCluster = new Hashtable<String, Integer>();
for(int j=0; j<bamIdxList.size(); j++)
{
File f = new File(bamList.get(j));
String grp = groupsFrame.getGroupName( f.getName() );
if(groupCluster.containsKey(grp))
{
int val = groupCluster.get(grp)+1;
if(val >= minBams)
{
foundMinBams = true;
break;
}
groupCluster.put(grp, val);
}
else
groupCluster.put(grp, 1);
}
if(!foundMinBams)
continue;
}
// create a new feature
try
{
newEntry.createFeature(key,
( isFwd ? mri.createLocation() : mri.createLocation().getComplement()), null);
}
catch (ReadOnlyException e)
{
e.printStackTrace();
}
catch (EntryInformationException e)
{
e.printStackTrace();
}
catch (OutOfRangeException e)
{
e.printStackTrace();
}
}
}
}
/**
* Search for new features based on read counts.
* @param cnt read count
* @param isFwd strand to find features on is the forward strand if true
* @param fStart start of a new feature or -1 if not identified yet
* @param pos current base position
* @param range current base range
* @param excluseKeys feature keys of regions to avoid looking in
* @param markers list of new features
* @param entryGroup entry group
* @return
*/
private int findFeatures(final int cnt,
final boolean isFwd,
int fStart,
final int pos,
final Range range,
final Key excluseKeys[],
final List<MarkerObj> markers,
final EntryGroup entryGroup,
final int bamIdx)
if(cnt >= threshold && fStart == -1) // START FEATURE
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
{
boolean exclude = false;
try
{
final FeatureVector features =
entryGroup.getFeaturesInRange(range);
for(int k=0; k<features.size(); k++)
{
Feature f = features.elementAt(k);
if( f.isProteinFeature() && ! (isFwd ^ f.isForwardFeature()) ) // on same strand
return fStart;
for(int l=0; l<excluseKeys.length; l++)
if(f.getKey().equals(excluseKeys[l]))
exclude = true;
}
}
catch (OutOfRangeException e1){ }
if(!exclude)
fStart = range.getStart();
}
else if(cnt < threshold && fStart != -1)
{
try
{
boolean exclude = false;
final FeatureVector features = entryGroup.getFeaturesInRange(range);
for(int k=0; k<features.size(); k++)
{
Feature f = features.elementAt(k);
for(int l=0; l<excluseKeys.length; l++)
if(f.getKey().equals(excluseKeys[l]))
exclude = true;
}
if(!exclude &&
(range.getStart()-fStart) >= minSize)
{
final MarkerRange marker = new MarkerRange(
bases.getForwardStrand(), fStart, range.getStart());
markers.add( new MarkerObj(marker, bamIdx) );
}
}
catch (OutOfRangeException e1){}
return -1;
}
else if (fStart != -1)
{
try
{
FeatureVector features = entryGroup.getFeaturesInRange(range);
for(int k=0; k<features.size(); k++)
{
boolean exclude = false;
for(int l=0; l<excluseKeys.length; l++)
if(f.getKey().equals(excluseKeys[l]))
exclude = true;
if( exclude ||
(f.isProteinFeature() && ! (isFwd ^ f.isForwardFeature())) ) // on same strand
if((f.getRawFirstBase()-fStart) >= minSize)
{
final MarkerRange marker = new MarkerRange(
bases.getForwardStrand(), fStart, f.getRawFirstBase() );
markers.add( new MarkerObj(marker, bamIdx) );
}
return -1;
}
}
}
catch (OutOfRangeException e)
{
e.printStackTrace();
}
}
return fStart;
}
public void finished()
{
dialog.dispose();
}
}
class ReadCount
{
private float senseCnt = 0;
private float antiCnt = 0;
ReadCount(float[] cnt, boolean isFwd)
{
if(isFwd)
{
senseCnt = cnt[0];
antiCnt = cnt[1];
}
else
{
senseCnt = cnt[1];
antiCnt = cnt[0];
}
}
}