Skip to content
Snippets Groups Projects
BamUtils.java 11.4 KiB
Newer Older
/* BamUtils
 *
 * created: 2011
 *
 * This file is part of Artemis
 *
 * Copyright(C) 2011 Genome Research Limited
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or(at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 */
package uk.ac.sanger.artemis.components.alignment;

tcarver's avatar
tcarver committed
import java.util.HashMap;
import java.util.Hashtable;
tcarver's avatar
tcarver committed
import java.util.List;
import java.util.Vector;

import javax.swing.JProgressBar;

tcarver's avatar
tcarver committed
import net.sf.samtools.AlignmentBlock;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import uk.ac.sanger.artemis.Feature;
tjc's avatar
tjc committed
import uk.ac.sanger.artemis.FeatureSegmentVector;
import uk.ac.sanger.artemis.FeatureVector;
tjc's avatar
tjc committed
import uk.ac.sanger.artemis.io.Range;
tjc's avatar
tjc committed

  protected static float getFeatureLength(Feature f)
tjc's avatar
tjc committed
  {
    FeatureSegmentVector segs = f.getSegments();
    int len = 0;
    for(int i=0; i<segs.size(); i++)
    {
      Range r = segs.elementAt(i).getRawRange();
tjc's avatar
tjc committed
      len += r.getEnd()-r.getStart()+1;
tjc's avatar
tjc committed
    }
tjc's avatar
tjc committed
    return (float)len;
  /**
   * Count the reads in a range.
   * @param start
   * @param end
   * @param bam
   * @param refName
   * @param samFileReaderHash
   * @param seqNames
   * @param offsetLengths
   * @param concatSequences
   * @param seqLengths
   * @param samRecordFlagPredicate
   * @param samRecordMapQPredicate
   * @param contained
   * @param useStrandTag - strand specific tag
  protected static float[] getCount(
      final BamView bamView,
      final int start,
      final int end,
      final String bam,
      final boolean contained,
      final boolean useStrandTag)
    final Vector<String> seqNames = bamView.getSeqNames();
    final HashMap<String, Integer> offsetLengths = bamView.getOffsetLengths();
    final HashMap<String, Integer> seqLengths = bamView.getSeqLengths();

    int cnt[] = new int[2];
    cnt[0] = 0;
    cnt[1] = 0;
    if(bamView.isConcatSequences())
    {
      int len = 0;
      int lastLen = 1;
      for(String name : seqNames)
      {
        int thisLength = seqLengths.get(name);
        len += thisLength;

        if( (lastLen >= start && lastLen < end) ||
            (len >= start && len < end) ||
            (start >= lastLen && start < len) ||
tcarver's avatar
tcarver committed
            (end > lastLen && end < len) )
        {
          int offset = offsetLengths.get(name); 
          int thisStart = start - offset;
          if(thisStart < 1)
            thisStart = 1;
          int thisEnd   = end - offset;
          if(thisEnd > thisLength)
            thisEnd = thisLength;

          cnt = count(bamView, bam, thisStart, thisEnd, contained, true, useStrandTag);
      cnt = count(bamView, bam, start, end, contained, true, useStrandTag);
    
    float cntf[] = new float[2];
    cntf[0] = cnt[0];
    cntf[1] = cnt[1];
    return cntf;
  protected static int[] count(
          final BamView bamView,
          final String bam, 
          final int start,
          final int end,
          final boolean contained,
          final boolean byStrand,
          final boolean useStrandTag)
    final String refName = (String) bamView.getCombo().getSelectedItem();
    final Hashtable<String, SAMFileReader> samFileReaderHash = bamView.getSamFileReaderHash();
    final SAMRecordPredicate samRecordFlagPredicate = bamView.getSamRecordFlagPredicate();
    final SAMRecordPredicate samRecordMapQPredicate = bamView.getSamRecordMapQPredicate();

    int cnt[] = new int[2];
    cnt[0] = 0;
    cnt[1] = 0;
    
    SAMFileReader inputSam = samFileReaderHash.get(bam);
    final CloseableIterator<SAMRecord> it = inputSam.query(refName, start, end, contained);
tjc's avatar
tjc committed

    while ( it.hasNext() )
    {
      SAMRecord samRecord = it.next();
      if( samRecordFlagPredicate == null ||
          !samRecordFlagPredicate.testPredicate(samRecord))
       {
         if(samRecordMapQPredicate == null ||
            samRecordMapQPredicate.testPredicate(samRecord))
         {
           if(byStrand && BamView.isNegativeStrand(samRecord, useStrandTag))
             cnt[1]++;
           else
             cnt[0]++;
  
  protected static int[] calc(
      final BamView bamView, 
      final String refName, 
      final int sequenceLength,
      final boolean useStrandTag,
      final JProgressBar progressBar)
  {
    int mappedReads[] = new int[bamView.bamList.size()];
    int MAX_BASE_CHUNK = 2000 * 60;
    boolean contained = false;
    for (int i = 0; i < sequenceLength; i += MAX_BASE_CHUNK)
    {
      if(progressBar != null)
        progressBar.setValue(i);
      int sbegc = i;
      int sendc = i + MAX_BASE_CHUNK - 1;

      for (int j = 0; j < bamView.bamList.size(); j++)
      {
        String bam = bamView.bamList.get(j);
        if (bamView.isConcatSequences())
        {
          int len = 0;
          int lastLen = 1;
          for (String name : bamView.getSeqNames())
          {
            int thisLength = bamView.getSeqLengths().get(name);
            len += thisLength;

            if ((lastLen >= sbegc && lastLen < sendc)
                || (len >= sbegc && len < sendc)
                || (sbegc >= lastLen && sbegc < len)
                || (sendc >= lastLen && sendc < len))
            {
              int offset = bamView.getOffsetLengths().get(name);
              int thisStart = sbegc - offset;
              if (thisStart < 1)
                thisStart = 1;
              int thisEnd = sendc - offset;
              if (thisEnd > thisLength)
                thisEnd = thisLength;

              mappedReads[j] += BamUtils.count(bamView, bam, thisStart, thisEnd, 
                  contained, false, useStrandTag)[0];
            }
            lastLen = len;
          }
        }
        else
        {
          mappedReads[j] += BamUtils.count(bamView, bam, sbegc, sendc,
              contained, false, useStrandTag)[0];
        }
      }
    }
    return mappedReads;
  }
tcarver's avatar
tcarver committed
  /**
   * Return the coverage for each base in a range for the forward and
   * reverse strand.
   * @param bamView
tcarver's avatar
tcarver committed
   * @param bamFile
   * @param start
   * @param end
   * @param concatShift
   * @param cnt
tcarver's avatar
tcarver committed
   * @return
   */
  protected static int[][] countOverRange(
      final BamView bamView,
      final String bamFile, 
      final int start, 
      final int end, 
      final int concatShift, 
      final int cnt[][])
    final String refName = (String) bamView.getCombo().getSelectedItem();
    final Hashtable<String, SAMFileReader> samFileReaderHash = bamView.getSamFileReaderHash();
    final SAMRecordPredicate samRecordFlagPredicate = bamView.getSamRecordFlagPredicate();
    final SAMRecordPredicate samRecordMapQPredicate = bamView.getSamRecordMapQPredicate();

tcarver's avatar
tcarver committed
    SAMFileReader inputSam = samFileReaderHash.get(bamFile);
    final CloseableIterator<SAMRecord> it = 
        inputSam.query(refName, start, end, false);

    while (it.hasNext())
    {
      SAMRecord samRecord = it.next();
      if (samRecordFlagPredicate == null
          || !samRecordFlagPredicate.testPredicate(samRecord))
      {
        if (samRecordMapQPredicate == null
            || samRecordMapQPredicate.testPredicate(samRecord))
        {
          List<AlignmentBlock> blocks = samRecord.getAlignmentBlocks();
          boolean isFwd = !samRecord.getReadNegativeStrandFlag();
          
          for(int j=0; j<blocks.size(); j++)
          {
            AlignmentBlock block = blocks.get(j);
            int refStart = block.getReferenceStart();
            for(int i=0; i<block.getLength(); i++)
            {
              int pos = refStart + i + concatShift;
tcarver's avatar
tcarver committed
              int bin = pos - start;
              if(bin < 0 || bin > cnt.length-1)
                continue;
              
              if(isFwd)
                cnt[bin][0]++;
              else
                cnt[bin][1]++;
            } 
          }
        }
      }
    }
    it.close();
    return cnt;
  }

  /**
   * For a list of features calculate the read count for each
   * @param bamView
   * @param features
   * @param contained
   * @param useIntrons
   * @param useStrandTag
   * @param mappedReads
   * @param progressBar
   * @return
   */
  protected static Hashtable<String, List<ReadCount>> calculateMappedReads(
      final BamView bamView,
      final FeatureVector features,
      final boolean contained, 
      final boolean useIntrons,
      final boolean useStrandTag,
      final int mappedReads[],
      final JProgressBar progressBar)
  {
    final Hashtable<String, List<ReadCount>> featureReadCount = 
        new Hashtable<String, List<ReadCount>>();
    for (int i = 0; i < features.size(); i++)
    {
      final Feature f = features.elementAt(i);
      if(progressBar != null)
        progressBar.setValue(i);

      int start = f.getRawFirstBase();
      int end = f.getRawLastBase();
      final float fLen = BamUtils.getFeatureLength(f);
      List<ReadCount> sampleCounts = new Vector<ReadCount>();

      for (int j = 0; j < bamView.bamList.size(); j++)
      {
        final String bam = bamView.bamList.get(j);
        float cnt[] = new float[2];

        cnt = BamUtils.getCount(bamView, start, end, bam, contained, useStrandTag);
        if (!useIntrons && f.getSegments().size() > 1)
        {
          // remove reads contained by intron
          for (int k = 0; k < f.getSegments().size()-1; k++)
          {
            int seg = k;
            int nextSeg = k+1;
            if(!f.isForwardFeature())
            {
              seg = f.getSegments().size()-k-1;
              nextSeg = seg-1;
            }

            start = f.getSegments().elementAt(seg).getRawRange().getEnd();
            end = f.getSegments().elementAt(nextSeg).getRawRange().getStart();

            float tmpcnt[] = new float[2];
            tmpcnt = BamUtils.getCount(bamView, start, end, bam, true, useStrandTag);
            cnt[0] -= tmpcnt[0];
            cnt[1] -= tmpcnt[1];
          }
        }
        
        if (mappedReads != null)
        {
          cnt[0] = (cnt[0] / (((float) mappedReads[j] / 1000000.f) * (fLen / 1000.f)));
          cnt[1] = (cnt[1] / (((float) mappedReads[j] / 1000000.f) * (fLen / 1000.f)));
        }

        sampleCounts.add( new ReadCount(cnt, f.isForwardFeature()) );
      }
      featureReadCount.put(ReadCountDialog.getFeatureName(f), sampleCounts);
    }
    return featureReadCount;
  }
  
  /**
   * Check whether two SAM records are the same.
   * @param bamRec1 SamViewRecord
   * @param bamRec2 SamViewRecord
   * @return boolean - true if equality holds.
   */
  public static boolean samRecordEqualityCheck(SAMRecord rec1, SAMRecord rec2)
  {
	  boolean result = false;
	  
	  if (rec1 == null && rec2 == null)
		  return true;
	  
	  result = (
		(rec1.getReadName().equals(rec2.getReadName())) &&
		(rec1.getAlignmentStart() == rec2.getAlignmentStart()) &&
		(rec1.getAlignmentEnd() == rec2.getAlignmentEnd()) &&
		(rec1.getFlags() == rec2.getFlags())
	  );
	  
	  return result;
  }