diff --git a/uk/ac/sanger/artemis/components/alignment/BamUtils.java b/uk/ac/sanger/artemis/components/alignment/BamUtils.java index 6ae9a7916038832b89e1c7c601cd88e81ca2a5b1..46f60f9d910a3e704555eebe93b43c7ab0e35b72 100644 --- a/uk/ac/sanger/artemis/components/alignment/BamUtils.java +++ b/uk/ac/sanger/artemis/components/alignment/BamUtils.java @@ -25,8 +25,10 @@ package uk.ac.sanger.artemis.components.alignment; import java.util.HashMap; import java.util.Hashtable; +import java.util.List; import java.util.Vector; +import net.sf.samtools.AlignmentBlock; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; @@ -117,14 +119,14 @@ class BamUtils return cnt; } - protected static int count(String bam, - Hashtable<String, SAMFileReader> samFileReaderHash, - String refName, - int start, - int end, - SAMRecordPredicate samRecordFlagPredicate, - SAMRecordPredicate samRecordMapQPredicate, - boolean contained) + protected static int count(final String bam, + final Hashtable<String, SAMFileReader> samFileReaderHash, + final String refName, + final int start, + final int end, + final SAMRecordPredicate samRecordFlagPredicate, + final SAMRecordPredicate samRecordMapQPredicate, + final boolean contained) { int cnt = 0; SAMFileReader inputSam = samFileReaderHash.get(bam); @@ -139,7 +141,7 @@ class BamUtils if(samRecordMapQPredicate == null || samRecordMapQPredicate.testPredicate(samRecord)) { - cnt++; + cnt++; } } } @@ -147,5 +149,70 @@ class BamUtils return cnt; } + /** + * Return the coverage for each base in a range for the forward and + * reverse strand. + * @param bamFile + * @param samFileReaderHash + * @param refName + * @param start + * @param end + * @param samRecordFlagPredicate + * @param samRecordMapQPredicate + * @return + */ + protected static int[][] countOverRange(final String bamFile, + final Hashtable<String, SAMFileReader> samFileReaderHash, + final String refName, + final int start, + final int end, + final SAMRecordPredicate samRecordFlagPredicate, + final SAMRecordPredicate samRecordMapQPredicate) + { + final int cnt[][] = new int[end-start+1][2]; + + for (int i = 0; i < cnt.length; i++) + for (int j = 0; j < 2; j++) + cnt[i][j] = 0; + + SAMFileReader inputSam = samFileReaderHash.get(bamFile); + final CloseableIterator<SAMRecord> it = + inputSam.query(refName, start, end, false); + + while (it.hasNext()) + { + SAMRecord samRecord = it.next(); + if (samRecordFlagPredicate == null + || !samRecordFlagPredicate.testPredicate(samRecord)) + { + if (samRecordMapQPredicate == null + || samRecordMapQPredicate.testPredicate(samRecord)) + { + List<AlignmentBlock> blocks = samRecord.getAlignmentBlocks(); + boolean isFwd = !samRecord.getReadNegativeStrandFlag(); + + for(int j=0; j<blocks.size(); j++) + { + AlignmentBlock block = blocks.get(j); + int refStart = block.getReferenceStart(); + for(int i=0; i<block.getLength(); i++) + { + int pos = refStart + i; + int bin = pos - start; + if(bin < 0 || bin > cnt.length-1) + continue; + + if(isFwd) + cnt[bin][0]++; + else + cnt[bin][1]++; + } + } + } + } + } + it.close(); + return cnt; + } } diff --git a/uk/ac/sanger/artemis/components/alignment/BamView.java b/uk/ac/sanger/artemis/components/alignment/BamView.java index b0b75249c88a6da5602938aef61a55c58923cd12..1a116ea0043aa74b63750279f5ae9d8457c3c171 100644 --- a/uk/ac/sanger/artemis/components/alignment/BamView.java +++ b/uk/ac/sanger/artemis/components/alignment/BamView.java @@ -2305,7 +2305,7 @@ public class BamView extends JPanel { public void actionPerformed(ActionEvent e) { - FeatureVector features = feature_display.getSelection().getAllFeatures(); + final FeatureVector features = feature_display.getSelection().getAllFeatures(); JCheckBox overlap = new JCheckBox("Include all overlapping reads", true); overlap.setToolTipText("Include reads that partially overlap the feature"); @@ -2336,6 +2336,24 @@ public class BamView extends JPanel !overlap.isSelected(), spliced.isSelected(), allRefSeqs.isSelected()); } }); + + + + final JMenuItem createFeatures = new JMenuItem("Create features from coverage peaks ..."); + analyse.add(createFeatures); + createFeatures.addActionListener(new ActionListener() + { + public void actionPerformed(ActionEvent e) + { + if(feature_display == null) + return; + + new MappedReads(feature_display, (String)combo.getSelectedItem(), + samFileReaderHash, bamList, seqNames, offsetLengths, + concatSequences, seqLengths, samRecordFlagPredicate, samRecordMapQPredicate, + true); + } + }); for(int i=0; i<bamList.size(); i++) addToViewMenu(i); diff --git a/uk/ac/sanger/artemis/components/alignment/MappedReads.java b/uk/ac/sanger/artemis/components/alignment/MappedReads.java index 577a64d9d1ebbdeea57eaae4767de056cc44daaf..96e32b6bfc6b3856786e390f912af3e37eca427c 100644 --- a/uk/ac/sanger/artemis/components/alignment/MappedReads.java +++ b/uk/ac/sanger/artemis/components/alignment/MappedReads.java @@ -17,10 +17,20 @@ import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.JProgressBar; +import uk.ac.sanger.artemis.Entry; +import uk.ac.sanger.artemis.EntryGroup; import uk.ac.sanger.artemis.Feature; import uk.ac.sanger.artemis.FeatureVector; +import uk.ac.sanger.artemis.components.FeatureDisplay; import uk.ac.sanger.artemis.components.FileViewer; import uk.ac.sanger.artemis.components.SwingWorker; +import uk.ac.sanger.artemis.io.EntryInformationException; +import uk.ac.sanger.artemis.io.Key; +import uk.ac.sanger.artemis.io.Range; +import uk.ac.sanger.artemis.sequence.Bases; +import uk.ac.sanger.artemis.sequence.MarkerRange; +import uk.ac.sanger.artemis.util.OutOfRangeException; +import uk.ac.sanger.artemis.util.ReadOnlyException; import net.sf.samtools.SAMFileReader; @@ -167,6 +177,68 @@ public class MappedReads cmr.start(); dialog.setVisible(true); } + + + + /** + * Search for new features based on a threshold of read counts in the intergenic + * and anti-sense regions of existing annotations. This should exclude rRNA and + * tRNA regions. If two or more BAM files are loaded it should create features + * based on the combined read peak span. + * @param feature_display + * @param refName + * @param samFileReaderHash + * @param bamList + * @param seqNames + * @param offsetLengths + * @param concatSequences + * @param seqLengths + * @param samRecordFlagPredicate + * @param samRecordMapQPredicate + * @param contained + */ + public MappedReads( + final FeatureDisplay feature_display, + final String refName, + final Hashtable<String, SAMFileReader> samFileReaderHash, + final List<String> bamList, + final Vector<String> seqNames, + final HashMap<String, Integer> offsetLengths, + final boolean concatSequences, + final HashMap<String, Integer> seqLengths, + final SAMRecordPredicate samRecordFlagPredicate, + final SAMRecordMapQPredicate samRecordMapQPredicate, + final boolean contained) + { + this.refName = refName; + this.samFileReaderHash = samFileReaderHash; + this.bamList = bamList; + this.seqNames = seqNames; + this.offsetLengths = offsetLengths; + this.concatSequences = concatSequences; + this.seqLengths = seqLengths; + this.samRecordFlagPredicate = samRecordFlagPredicate; + this.samRecordMapQPredicate = samRecordMapQPredicate; + this.contained = contained; + + progressBar = new JProgressBar(0, feature_display.getSequenceLength()); + progressBar.setValue(0); + progressBar.setStringPainted(true); + + JPanel panel = new JPanel(new BorderLayout()); + progressTxt.setText("Search for new features"); + panel.add(progressTxt, BorderLayout.NORTH); + panel.add(progressBar, BorderLayout.CENTER); + + panel.setOpaque(true); + dialog.setContentPane(panel); + dialog.pack(); + centerDialog(); + + CalculateNewFeatures cmr = new CalculateNewFeatures(feature_display, refName); + cmr.start(); + dialog.setVisible(true); + } private void centerDialog() { @@ -349,4 +421,246 @@ public class MappedReads } } } + + class CalculateNewFeatures extends SwingWorker + { + private EntryGroup entryGroup; + private Bases bases; + private String refSeq; + + CalculateNewFeatures(final FeatureDisplay feature_display, final String refSeq) + { + entryGroup = feature_display.getEntryGroup(); + bases = feature_display.getBases(); + this.refSeq = refSeq; + } + + public Object construct() + { + int MAX_BASE_CHUNK = 2000 * 60; + + final int threshold = 4; + final int minSize = 10; + + Key excluseKeys[] = { new Key("rRNA"), new Key("tRNA") }; + int seqlen = bases.getLength(); + + final int beg = 1; + final int end = seqlen; + + int offset = 0; + + int fwdStart = -1; + int revStart = -1; + final List<MarkerRange> fwdMarkers = new Vector<MarkerRange>(); + final List<MarkerRange> revMarkers = new Vector<MarkerRange>(); + for (int i = 0; i < bamList.size(); i++) + { + for(int j=beg; j<end; j+=MAX_BASE_CHUNK) + { + progressBar.setValue((j + (i*end)) / bamList.size()); + if(j > end) + continue; + int start = j; + int stop = j+MAX_BASE_CHUNK; + + try + { + int cnt[][] = null; + if (concatSequences) + { + for (String name : seqNames) + { + int len = seqLengths.get(name); + offset = offsetLengths.get(name); + + if( (start >= offset && start <= offset+len) || + (stop >= offset && start <= offset+len) ) + { + cnt = + BamUtils.countOverRange(bamList.get(i), samFileReaderHash, + name, start-offset, stop-offset, + samRecordFlagPredicate, samRecordMapQPredicate); + } + } + } + else + { + cnt = + BamUtils.countOverRange(bamList.get(i), samFileReaderHash, + refSeq, start, stop, + samRecordFlagPredicate, samRecordMapQPredicate); + } + + for(int k=0; k<cnt.length; k++) + { + final Range r = new Range(start+k, start+k+1); + + fwdStart = findFeatures(cnt[k][0], true, threshold, fwdStart, j+k, + r, minSize, excluseKeys, fwdMarkers, entryGroup); + + revStart = findFeatures(cnt[k][1], false, threshold, revStart, j+k, + r, minSize, excluseKeys, revMarkers, entryGroup); + } + + } + catch (OutOfRangeException e1) + { + e1.printStackTrace(); + } + } + } + + System.out.println("No. of features="+fwdMarkers.size()); + final Entry newEntry = entryGroup.createEntry ("BAM_CDS"); + createFeatures(fwdMarkers, true, newEntry); + createFeatures(revMarkers, false, newEntry); + return null; + } + + + private void createFeatures(final List<MarkerRange> markers, final boolean isFwd, final Entry newEntry) + { + // merge overlapping markers + List<MarkerRange> featureMarkers = new Vector<MarkerRange>(); + List<Integer> ignoreMarkers = new Vector<Integer>(); + for(int i=0; i<markers.size(); i++) + { + if(ignoreMarkers.contains(i)) + continue; + MarkerRange mi = markers.get(i); + for(int j=i+1; j<markers.size(); j++) + { + if(ignoreMarkers.contains(j)) + continue; + MarkerRange mj = markers.get(j); + if(mi.overlaps(mj)) + { + mi = mi.combineRanges(mj, false); + ignoreMarkers.add(j); + } + } + featureMarkers.add(mi); + } + + for(MarkerRange r: featureMarkers) + { + try + { + newEntry.createFeature(Key.CDS, + ( isFwd ? r.createLocation() : r.createLocation().getComplement()), null); + } + catch (ReadOnlyException e1) + { + e1.printStackTrace(); + } + catch (EntryInformationException e1) + { + e1.printStackTrace(); + } + catch (OutOfRangeException e1) + { + e1.printStackTrace(); + } + } + } + + /** + * Search for new features based on read counts. + * @param cnt read count + * @param isFwd strand to find features on is the forward strand if true + * @param threshold read count minimum threshold + * @param fStart start of a new feature or -1 if not identified yet + * @param pos current base position + * @param range current base range + * @param minSize minimum feature size + * @param excluseKeys feature keys of regions to avoid looking in + * @param markers list of new features + * @param entryGroup entry group + * @return + */ + private int findFeatures(final int cnt, + final boolean isFwd, + final int threshold, + int fStart, + final int pos, + final Range range, + final int minSize, + final Key excluseKeys[], + final List<MarkerRange> markers, + final EntryGroup entryGroup) + { + if(cnt > threshold && fStart == -1) // START FEATURE + { + boolean exclude = false; + try + { + final FeatureVector features = + entryGroup.getFeaturesInRange(range); + for(int k=0; k<features.size(); k++) + { + Feature f = features.elementAt(k); + if( f.isProteinFeature() && ! (isFwd ^ f.isForwardFeature()) ) // on same strand + return fStart; + + for(int l=0; l<excluseKeys.length; l++) + if(f.getKey().equals(excluseKeys[l])) + exclude = true; + } + } + catch (OutOfRangeException e1){ } + + if(!exclude) + fStart = range.getStart(); + } + else if(cnt < threshold && fStart != -1) + { + try + { + if((range.getStart()-fStart) >= minSize) + { + final MarkerRange marker = new MarkerRange( + bases.getForwardStrand(), fStart, range.getStart()); + markers.add( marker ); + } + } + catch (OutOfRangeException e1){} + + return -1; + } + else if (fStart != -1) + { + try + { + FeatureVector features = entryGroup.getFeaturesInRange(range); + for(int k=0; k<features.size(); k++) + { + Feature f = features.elementAt(k); + if( f.isProteinFeature() && ! (isFwd ^ f.isForwardFeature()) ) // on same strand + { + if((f.getFirstBase()-fStart) >= minSize) + { + final MarkerRange marker = new MarkerRange( + bases.getForwardStrand(), fStart, f.getRawFirstBase() ); + markers.add( marker ); + } + return -1; + } + } + } + catch (OutOfRangeException e) + { + e.printStackTrace(); + } + + } + return fStart; + } + + + public void finished() + { + dialog.dispose(); + } + } }