diff --git a/uk/ac/sanger/artemis/components/alignment/BamUtils.java b/uk/ac/sanger/artemis/components/alignment/BamUtils.java index 09c10684be27207ed49ea1396807a613b0074fa6..b4ddf4df1d304b29364105407f8012c519a21f66 100644 --- a/uk/ac/sanger/artemis/components/alignment/BamUtils.java +++ b/uk/ac/sanger/artemis/components/alignment/BamUtils.java @@ -23,21 +23,30 @@ */ package uk.ac.sanger.artemis.components.alignment; +import java.awt.Container; +import java.awt.Frame; +import java.text.DecimalFormat; import java.util.Hashtable; import java.util.List; import java.util.Vector; +import javax.swing.JFrame; + import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import uk.ac.sanger.artemis.Feature; +import uk.ac.sanger.artemis.FeatureSegmentVector; import uk.ac.sanger.artemis.FeatureVector; +import uk.ac.sanger.artemis.components.EntryEdit; import uk.ac.sanger.artemis.components.FileViewer; +import uk.ac.sanger.artemis.components.MultiComparator; +import uk.ac.sanger.artemis.io.Range; class BamUtils { /** - * + * Read count for selected reads. * @param features * @param refName * @param samFileReaderHash @@ -50,6 +59,7 @@ class BamUtils * @param samRecordMapQPredicate * @param contained * @param useIntrons + * @param mappedReads */ protected static void countReads(final FeatureVector features, final String refName, @@ -62,26 +72,30 @@ class BamUtils final SAMRecordFlagPredicate samRecordFlagPredicate, final SAMRecordMapQPredicate samRecordMapQPredicate, final boolean contained, - final boolean useIntrons) - { - Hashtable<String, List<Integer>> featureReadCount = new Hashtable<String, List<Integer>>(); + final boolean useIntrons, + final int mappedReads[]) + { + Hashtable<String, List<Float>> featureReadCount = new Hashtable<String, List<Float>>(); + for(int i=0; i<features.size(); i++) { Feature f = features.elementAt(i); - int start = f.getFirstBase(); - int end = f.getLastBase(); - List<Integer> sampleCounts = new Vector<Integer>(); + int start = f.getFirstBase(); + int end = f.getLastBase(); + float fLenKb = getFeatureLength(f); + List<Float> sampleCounts = new Vector<Float>(); - for(String bam : bamList) + for(int j=0; j<bamList.size(); j++) { - int cnt = 0; + String bam = bamList.get(j); + float cnt = 0; if(!useIntrons && f.getSegments().size() > 1) { - for(int j=0; j<f.getSegments().size(); j++) + for(int k=0; k<f.getSegments().size(); k++) { - start = f.getSegments().elementAt(j).getStart().getPosition(); - end = f.getSegments().elementAt(j).getEnd().getPosition(); + start = f.getSegments().elementAt(k).getStart().getPosition(); + end = f.getSegments().elementAt(k).getEnd().getPosition(); cnt += getCount(start, end, bam, refName, samFileReaderHash, seqNames, offsetLengths, concatSequences, seqLengths, samRecordFlagPredicate, samRecordMapQPredicate, contained); @@ -91,13 +105,16 @@ class BamUtils cnt = getCount(start, end, bam, refName, samFileReaderHash, seqNames, offsetLengths, concatSequences, seqLengths, samRecordFlagPredicate, samRecordMapQPredicate, contained); + + if(mappedReads != null) + cnt = cnt / ((((float)mappedReads[j]) / 1000000.f)*fLenKb); sampleCounts.add(cnt); } - featureReadCount.put(f.getSystematicName(), sampleCounts); } + DecimalFormat df = new DecimalFormat("0.00##"); StringBuffer buff = new StringBuffer(); for(String bam : bamList) buff.append("#BAM: "+bam+"\n"); @@ -105,16 +122,32 @@ class BamUtils for (String fId : featureReadCount.keySet() ) { buff.append(fId+"\t"); - List<Integer> cnts = featureReadCount.get(fId); + List<Float> cnts = featureReadCount.get(fId); for(int i=0; i<cnts.size(); i++) - buff.append(cnts.get(i)+(i<cnts.size()-1 ? "\t" : "")); + buff.append(df.format(cnts.get(i)) + (i<cnts.size()-1 ? "\t" : "")); buff.append("\n"); } - FileViewer viewer = new FileViewer ("Read Count", true, false, true); + FileViewer viewer; + if(mappedReads != null) + viewer = new FileViewer ("RPKM", true, false, true); + else + viewer = new FileViewer ("Read Count", true, false, true); viewer.getTextPane().setText(buff.toString()); } + private static float getFeatureLength(Feature f) + { + FeatureSegmentVector segs = f.getSegments(); + int len = 0; + for(int i=0; i<segs.size(); i++) + { + Range r = segs.elementAt(i).getRawRange(); + len += r.getEnd()-r.getStart(); + } + return ((float)len) / 1000.f; + } + /** * Count the reads in a range. * @param start @@ -183,6 +216,81 @@ class BamUtils return cnt; } + /** + * Calculate the total number of mapped reads. + * @param refName + * @param samFileReaderHash + * @param bamList + * @param seqNames + * @param offsetLengths + * @param concatSequences + * @param seqLengths + * @param sequenceLength + * @return + */ + protected static int[] getTotalMappedReads( + final String refName, + final Hashtable<String, SAMFileReader> samFileReaderHash, + final List<String> bamList, + final Vector<String> seqNames, + final Hashtable<String, Integer> offsetLengths, + final boolean concatSequences, + final Hashtable<String, Integer> seqLengths, + final int sequenceLength) + { + int MAX_BASE_CHUNK = 2000*60; + int mapped[] = new int[bamList.size()]; + boolean contained = false; + SAMRecordFlagPredicate samRecordFlagPredicate = new SAMRecordFlagPredicate(SAMRecordFlagPredicate.READ_UNMAPPED_FLAG); + SAMRecordMapQPredicate samRecordMapQPredicate = new SAMRecordMapQPredicate(-1); + + for (int i = 0; i < sequenceLength; i += MAX_BASE_CHUNK) + { + int sbegc = i; + int sendc = i + MAX_BASE_CHUNK - 1; + + for (int j=0; j<bamList.size(); j++) + { + String bam = bamList.get(j); + if (concatSequences) + { + int len = 0; + int lastLen = 1; + for (String name : seqNames) + { + int thisLength = seqLengths.get(name); + len += thisLength; + + if ((lastLen >= sbegc && lastLen < sendc) + || (len >= sbegc && len < sendc) + || (sbegc >= lastLen && sbegc < len) + || (sendc >= lastLen && sendc < len)) + { + int offset = offsetLengths.get(name); + int thisStart = sbegc - offset; + if (thisStart < 1) + thisStart = 1; + int thisEnd = sendc - offset; + if (thisEnd > thisLength) + thisEnd = thisLength; + + mapped[j] += count(bam, samFileReaderHash, name, thisStart, thisEnd, + samRecordFlagPredicate, samRecordMapQPredicate, contained); + + } + lastLen = len; + } + } + else + { + mapped[j] += count(bam, samFileReaderHash, refName, sbegc, sendc, + samRecordFlagPredicate, samRecordMapQPredicate, contained); + } + } + } + return mapped; + } + private static int count(String bam, Hashtable<String, SAMFileReader> samFileReaderHash, String refName, @@ -212,5 +320,18 @@ class BamUtils it.close(); return cnt; } + + protected static Container getBamContainer(BamView bamView) + { + Frame fs[] = JFrame.getFrames(); + for(Frame f: fs) + { + if( f instanceof JFrame && + ((JFrame)f) instanceof EntryEdit || + ((JFrame)f) instanceof MultiComparator) + return ((JFrame)f).getContentPane(); + } + return bamView; + } } diff --git a/uk/ac/sanger/artemis/components/alignment/BamView.java b/uk/ac/sanger/artemis/components/alignment/BamView.java index cee7aefdc7698da0dae4bf5f25109fb3a68cb1c4..dc79b8faa2a836183f3a1265d74c3407d05706c7 100644 --- a/uk/ac/sanger/artemis/components/alignment/BamView.java +++ b/uk/ac/sanger/artemis/components/alignment/BamView.java @@ -29,6 +29,8 @@ import java.awt.BorderLayout; import java.awt.Color; import java.awt.Component; import java.awt.Composite; +import java.awt.Container; +import java.awt.Cursor; import java.awt.Dimension; import java.awt.FlowLayout; import java.awt.FontMetrics; @@ -2004,12 +2006,14 @@ public class BamView extends JPanel final JMenu analyse = new JMenu("Analyse"); menu.add(analyse); - final JMenuItem readCount = new JMenuItem("Read count for selected features ..."); + final JMenuItem readCount = new JMenuItem("Read count of selected features ..."); analyse.add(readCount); readCount.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { + Container c = BamUtils.getBamContainer(BamView.this); + c.setCursor(new Cursor(Cursor.WAIT_CURSOR)); FeatureVector features = feature_display.getSelection().getAllFeatures(); JCheckBox overlap = new JCheckBox("Include all overlapping reads", true); @@ -2023,7 +2027,46 @@ public class BamView extends JPanel BamUtils.countReads(features, (String)combo.getSelectedItem(), samFileReaderHash, bamList, seqNames, offsetLengths, concatSequences, seqLengths, samRecordFlagPredicate, samRecordMapQPredicate, - !overlap.isSelected(), spliced.isSelected()); + !overlap.isSelected(), spliced.isSelected(), null); + c.setCursor(new Cursor(Cursor.DEFAULT_CURSOR)); + } + }); + + final JMenuItem rpkm = new JMenuItem("RPKM value of selected features ..."); + analyse.add(rpkm); + rpkm.addActionListener(new ActionListener() + { + public void actionPerformed(ActionEvent e) + { + Container c = BamUtils.getBamContainer(BamView.this); + c.setCursor(new Cursor(Cursor.WAIT_CURSOR)); + FeatureVector features = feature_display.getSelection().getAllFeatures(); + + JCheckBox overlap = new JCheckBox("Include all overlapping reads", true); + overlap.setToolTipText("Include reads that partially overlap the feature"); + JCheckBox spliced = new JCheckBox("Introns included", true); + Box yBox = Box.createVerticalBox(); + yBox.add(overlap); + yBox.add(spliced); + JOptionPane.showMessageDialog(null, yBox, "Read Count Option", JOptionPane.INFORMATION_MESSAGE); + + int seqlen = 0; + if(feature_display != null) + seqlen = feature_display.getSequenceLength(); + else if(bases != null) + seqlen = bases.getLength(); + + int mappedReads[] = + BamUtils.getTotalMappedReads((String)combo.getSelectedItem(), + samFileReaderHash, bamList, seqNames, offsetLengths, concatSequences, + offsetLengths, seqlen); + + logger4j.debug("TOTAL MAPPED READS "+mappedReads); + BamUtils.countReads(features, (String)combo.getSelectedItem(), samFileReaderHash, bamList, + seqNames, offsetLengths, concatSequences, seqLengths, + samRecordFlagPredicate, samRecordMapQPredicate, + !overlap.isSelected(), spliced.isSelected(), mappedReads); + c.setCursor(new Cursor(Cursor.DEFAULT_CURSOR)); } });