Skip to content
Snippets Groups Projects
IOUtils.java 20 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* IOUtils
     *
     * created: 2010
     *
     * This file is part of Artemis
     *
     * Copyright(C) 2010  Genome Research Limited
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or(at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
     */
    package uk.ac.sanger.artemis.components.variant;
    
    
    tjc's avatar
    tjc committed
    import java.io.FileInputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    
    tjc's avatar
    tjc committed
    import java.io.InputStream;
    import java.net.URL;
    
    tjc's avatar
    tjc committed
    import java.util.List;
    
    
    import javax.swing.JFileChooser;
    
    tjc's avatar
    tjc committed
    import javax.swing.JOptionPane;
    
    import uk.ac.sanger.artemis.Entry;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.EntryGroup;
    import uk.ac.sanger.artemis.Feature;
    import uk.ac.sanger.artemis.FeatureEnumeration;
    import uk.ac.sanger.artemis.FeatureKeyQualifierPredicate;
    
    import uk.ac.sanger.artemis.FeaturePredicate;
    import uk.ac.sanger.artemis.FeatureSegment;
    import uk.ac.sanger.artemis.FeatureSegmentVector;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.FeatureVector;
    
    import uk.ac.sanger.artemis.Selection;
    
    import uk.ac.sanger.artemis.components.FileViewer;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.components.MessageDialog;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.components.SequenceViewer;
    
    import uk.ac.sanger.artemis.components.StickyFileChooser;
    
    import uk.ac.sanger.artemis.components.variant.BCFReader.BCFReaderIterator;
    
    import uk.ac.sanger.artemis.io.DocumentEntry;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.io.Key;
    
    import uk.ac.sanger.artemis.io.Range;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.sequence.Bases;
    
    import uk.ac.sanger.artemis.sequence.MarkerRange;
    import uk.ac.sanger.artemis.util.DatabaseDocument;
    import uk.ac.sanger.artemis.util.FileDocument;
    import uk.ac.sanger.artemis.util.RemoteFileDocument;
    
    tjc's avatar
    tjc committed
    
    import net.sf.samtools.util.BlockCompressedInputStream;
    
    class IOUtils
    
    tjc's avatar
    tjc committed
    { 
    
      
      private static final int MAXIMUM_SELECTED_FEATURES = 25;
      
    
    tjc's avatar
    tjc committed
      /**
       * Write filtered uncompressed VCF. Uses the filter in VCFview to
       * determine if variants are written.
       * @param vcfFileName
       * @param vcfView
       * @param features
       */
    
      protected static File writeVCF(final String vcfFileName, 
    
    tjc's avatar
    tjc committed
                                     final VCFview vcfView,
    
                                     final FeatureVector features,
                                     final int nfiles)
    
    tjc's avatar
    tjc committed
      {
        try
        {
    
    tjc's avatar
    tjc committed
          File filterFile = getFile(vcfFileName, nfiles, ".filter");
    
          FileWriter writer = new FileWriter(filterFile);
    
    tjc's avatar
    tjc committed
          if(IOUtils.isBCF(vcfFileName))
          {
            BCFReader.writeVCF(writer, vcfFileName);
    
    tjc's avatar
    tjc committed
          TabixReader tr = new TabixReader(vcfFileName);
          String line;
          while ((line = tr.readLine()) != null)
          {
            if(line.startsWith("#"))
    
    tjc's avatar
    tjc committed
            {
              writer.write(line+'\n');
    
    tjc's avatar
    tjc committed
              continue;
    
    tjc's avatar
    tjc committed
            }
    
    tjc's avatar
    tjc committed
            
            VCFRecord record = VCFRecord.parse(line);
    
    tjc's avatar
    tjc committed
            int basePosition = record.getPos() + vcfView.getSequenceOffset(record.getChrom());
    
            if( !vcfView.showVariant(record, features, basePosition, tr.isVcf_v4()) )
    
    tjc's avatar
    tjc committed
              continue;
            writer.write(line+'\n');
          }
          writer.close();
    
    tjc's avatar
    tjc committed
        }
        catch (IOException e)
        {
          e.printStackTrace();
    
    tjc's avatar
    tjc committed
        }
      }
      
    
      private static File getFile(final String vcfFileName, final int nfiles, final String suffix) throws IOException
    
          return new File(vcfFileName+suffix);
    
        final StickyFileChooser file_dialog = new StickyFileChooser();
    
    
        file_dialog.setSelectedFile(new File(vcfFileName+suffix));
    
        file_dialog.setDialogTitle("Choose save file ...");
        file_dialog.setDialogType(JFileChooser.SAVE_DIALOG);
        final int status = file_dialog.showSaveDialog(null);
    
        if(status != JFileChooser.APPROVE_OPTION ||
           file_dialog.getSelectedFile() == null) 
          return null;
        
        return file_dialog.getSelectedFile();
      }
      
    
    tjc's avatar
    tjc committed
      /**
       * Export as a VCF based on the filtering applied in the VCFview.
       * @param entryGroup
       * @param vcfFiles
       * @param vcfView
       */
    
    tjc's avatar
    tjc committed
      protected static void export(final EntryGroup entryGroup, 
                                   final List<String> vcfFiles,
                                   final VCFview vcfView)
      {
    
        // get all CDS features that do not have the /pseudo qualifier
        final FeatureVector features = getFeatures(
            new FeatureKeyQualifierPredicate(Key.CDS, "pseudo", false), entryGroup);
    
    tjc's avatar
    tjc committed
        
    
        String filterFiles = "";
        for(int i=0; i<vcfFiles.size(); i++)
        {
          File filterFile = IOUtils.writeVCF(vcfFiles.get(i), vcfView, features, vcfFiles.size());
          filterFiles += filterFile.getAbsolutePath()+"\n";
        }
    
        new MessageDialog (null, "Saved Files", filterFiles, false);
      }
    
    
    tjc's avatar
    tjc committed
       * Write out FASTA for a selected base range
    
       * @param entryGroup
       * @param vcfView
       * @param selection
       * @param view
       */
      protected static void exportFastaByRange(
                                        final EntryGroup entryGroup,
    
                                        final VCFview vcfView,
    
                                        final Selection selection,
                                        final boolean view)
    
    tjc's avatar
    tjc committed
        if(selection.getMarkerRange() == null)
        {
          JOptionPane.showMessageDialog(null, 
              "No base range selected.", 
              "Warning", JOptionPane.WARNING_MESSAGE);
          return;  
        }
    
    tjc's avatar
    tjc committed
    
    
        AbstractVCFReader vcfReaders[] = vcfView.getVcfReaders();
    
        MarkerRange marker = selection.getMarkerRange();
        Range range = marker.getRawRange();
        int direction = ( marker.isForwardMarker() ? Bases.FORWARD : Bases.REVERSE);
        FeatureVector features = entryGroup.getAllFeatures();
        FileWriter writer = null;
        String fastaFiles = "";
    
    tjc's avatar
    tjc committed
    
    
        String name = entryGroup.getActiveEntries().elementAt(0).getName();
    
        int sbeg = range.getStart();
        int send = range.getEnd();
    
        StringBuffer buffSeq = null;
    
        try
        {
          if(!view)
          {
            File newfile = new File(
               getBaseDirectoryFromEntry(entryGroup.getActiveEntries().elementAt(0)),
                   name);
          
    
    tjc's avatar
    tjc committed
            File f = getFile(newfile.getAbsolutePath(), 1, ".fasta");
    
            if(f == null)
              return;
    
            writer = new FileWriter(f);
            fastaFiles += f.getAbsolutePath()+"\n";
          }
    
          else
            buffSeq = new StringBuffer();
    
    
          for (int i = 0; i < vcfReaders.length; i++)
          {
            String basesStr = entryGroup.getBases().getSubSequence(marker.getRange(), direction);
    
            basesStr = getAllBasesInRegion(vcfReaders[i], sbeg, send, basesStr,
                features, vcfView, marker.isForwardMarker());
    
              
            StringBuffer header = new StringBuffer(name+" ");
            header.append(sbeg+":"+send+ (marker.isForwardMarker() ? "" : " reverse"));
            header.append(" (").append(vcfReaders[i].getName()).append(")");
    
            if(view) // sequence viewer
            {
    
    tjc's avatar
    tjc committed
              buffSeq.append(">").append(header.toString()).append("\n");
              wrapString(basesStr, buffSeq);
    
              buffSeq.append("\n");
    
            }
            else    // write to file
              writeSequence(writer, header.toString(), basesStr);
          }
    
    tjc's avatar
    tjc committed
    
    
          if(writer != null)
            writer.close();
        }
        catch(IOException e)
        {
          e.printStackTrace();
        }
        
        if(!view)
          new MessageDialog (null, "Saved Files", fastaFiles, false);
    
        else
        {
          FileViewer viewer = new FileViewer ("Feature base viewer for selected range: " + 
              sbeg+":"+send+(marker.isForwardMarker() ? "" : " reverse"), true);
          viewer.getTextPane().setText(buffSeq.toString());
        }
    
    tjc's avatar
    tjc committed
      /**
       * Write the FASTA sequence out for the given features for each of the
       * VCF/BCF files.
       * @param vcfView
       * @param features
       * @param view
       */
    
      protected static void exportFasta(final VCFview vcfView,
    
    tjc's avatar
    tjc committed
                                        final FeatureVector features,
                                        final boolean view)
    
    tjc's avatar
    tjc committed
        if(features.size () < 1)
        {
          JOptionPane.showMessageDialog(null, 
              "No features selected.", 
              "Warning", JOptionPane.WARNING_MESSAGE);
          return;  
        }
    
        if(view && features.size () > MAXIMUM_SELECTED_FEATURES)
          new MessageDialog (null,
                            "warning: only viewing the sequences for " +
                            "the first " + MAXIMUM_SELECTED_FEATURES +
                            " selected features");
    
        String suffix = ".fasta";
        if(features.size() == 1)
          suffix = "."+features.elementAt(0).getIDString()+suffix;
    
        FileWriter writer = null;
    
        String fastaFiles = "";
    
        AbstractVCFReader vcfReaders[] = vcfView.getVcfReaders();
    
          int opt = 0;
          if(vcfReaders.length > 1)
          {
            Object[] options = { "Single File", "Multiple Files" };
            opt = JOptionPane.showOptionDialog(null, "Write to :", 
                        "Output File",
                        JOptionPane.DEFAULT_OPTION, 
                        JOptionPane.QUESTION_MESSAGE, 
                        null, options, options[0]);
          }
          
          if(!view && opt == 0)
          {
            File f = getFile(vcfReaders[0].getFileName(), 1, suffix);
            if(f == null)
              return;
            writer = new FileWriter(f);
            fastaFiles += f.getAbsolutePath()+"\n";
          }
          
          for (int i = 0; i < vcfReaders.length; i++)
    
            if(!view && opt == 1)
    
              File f = getFile(vcfReaders[0].getFileName(), vcfReaders.length, suffix);
    
              writer = new FileWriter(f);
              fastaFiles += f.getAbsolutePath()+"\n";
            }
    
            
            for (int j = 0; j < features.size() && (!view || j < MAXIMUM_SELECTED_FEATURES); j++)
    
            {
              Feature f = features.elementAt(j);
              FeatureSegmentVector segs = f.getSegments();
              StringBuffer buff = new StringBuffer();
              for(int k=0; k<segs.size(); k++)
              {
                FeatureSegment seg = segs.elementAt(k);
                int sbeg = seg.getRawRange().getStart();
                int send = seg.getRawRange().getEnd();
    
                buff.append( getAllBasesInRegion(vcfReaders[i], sbeg, send, seg.getBases(),
    
                      features, vcfView, f.isForwardFeature()) );
    
    tjc's avatar
    tjc committed
    
    
              StringBuffer header = new StringBuffer(f.getSystematicName()).append(" ");
              header.append(f.getIDString()).append(" ");
    
              final String product = f.getProductString();
              header.append( (product == null ? "undefined product" : product) );
              header.append(" ").append(f.getWriteRange());
    
              header.append(" (").append(vcfReaders[i].getName()).append(")");
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
              if(view) // sequence viewer
              {
                SequenceViewer viewer =
    
                    new SequenceViewer ("Feature base viewer for feature:" + 
                      f.getIDString (), false);  
    
    tjc's avatar
    tjc committed
                viewer.setSequence(">"+header.toString(), buff.toString());
              }
              else    // write to file
                writeSequence(writer, header.toString(), buff.toString());
    
            if(writer != null && opt == 1)
    
              writer.close();
    
          
          if(writer != null && opt == 0)
            writer.close();
        } 
        catch(IOException e)
        {
          e.printStackTrace(); 
    
        
        if(!view )
          new MessageDialog (null, "Saved Files", fastaFiles, false);
    
      /**
       * For a given VCF file change the sequence in a range and return the
       * base sequence as a string.
       * @param reader
       * @param sbeg
       * @param send
       * @param basesStr
       * @param features
       * @param vcfView
       * @param isFwd
       * @return
       * @throws IOException
       */
      private static String getAllBasesInRegion(final AbstractVCFReader reader,
          final int sbeg,
          final int send,
          String basesStr,
          final FeatureVector features,
          final VCFview vcfView,
          final boolean isFwd) throws IOException
      {
        if(vcfView.isConcatenate())
        {
          String[] contigs = reader.getSeqNames();
          for(int j=0; j<contigs.length; j++)
          {
            int offset = vcfView.getSequenceOffset(contigs[j]);
            int nextOffset;
            if(j<contigs.length-1)
              nextOffset = vcfView.getSequenceOffset(contigs[j+1]);
            else
              nextOffset = vcfView.seqLength;
            
            if( (offset >= sbeg && offset < send) ||
                (offset < sbeg && sbeg < nextOffset) )
            {
              int thisStart = sbeg - offset;
              if(thisStart < 1)
                thisStart = 1;
              int thisEnd   = send - offset;
              basesStr = getBasesInRegion(reader, contigs[j], thisStart, thisEnd, 
                  basesStr, features, vcfView, isFwd);
            }
          }
        }
        else
          basesStr = getBasesInRegion(reader, vcfView.getChr(), sbeg, send, 
              basesStr, features, vcfView, isFwd);
        return basesStr;
      }
      
    
    tjc's avatar
    tjc committed
      /**
       * For a given VCF file change the sequence in a range and return the
       * base sequence as a string.
       * @param reader
       * @param chr
       * @param sbeg
       * @param send
       * @param basesStr
       * @param features
       * @param vcfView
       * @param isFwd
       * @return
       * @throws IOException
       */
      private static String getBasesInRegion(final AbstractVCFReader reader,
                                             final String chr,
                                             final int sbeg,
                                             final int send,
                                             String basesStr,
                                             final FeatureVector features,
                                             final VCFview vcfView,
    
                                             final boolean isFwd) throws IOException
    
    tjc's avatar
    tjc committed
      {
    
        boolean vcf_v4 = reader.isVcf_v4();
    
    tjc's avatar
    tjc committed
        if (reader instanceof BCFReader)
        {
          BCFReaderIterator it = ((BCFReader) reader).query(chr, sbeg, send);
          VCFRecord record;
          while ((record = it.next()) != null)
          {
            int basePosition = record.getPos() + vcfView.getSequenceOffset(record.getChrom());
    
            if(vcfView.showVariant(record, features, basePosition, vcf_v4) )
    
    tjc's avatar
    tjc committed
              basesStr = getSeqsVariation(record, basesStr, sbeg, isFwd, vcf_v4);
          }
        }
        else
        {
          TabixReader.Iterator iter = 
            (((TabixReader) reader).query(chr+":"+sbeg+"-"+send)); // get the iterator
          String s;
          while ((s = iter.next()) != null)
          {
            VCFRecord record = VCFRecord.parse(s);
            int basePosition = record.getPos() + vcfView.getSequenceOffset(record.getChrom());
    
            if(vcfView.showVariant(record, features, basePosition, vcf_v4) )
    
    tjc's avatar
    tjc committed
              basesStr = getSeqsVariation(record, basesStr, sbeg, isFwd, vcf_v4);
          }
        }
        return basesStr;
      }
      
    
    tjc's avatar
    tjc committed
      
      private static void wrapString(String bases, StringBuffer buff)
      {
        final int SEQUENCE_LINE_BASE_COUNT = 60;
        for(int k=0; k<bases.length(); k+=SEQUENCE_LINE_BASE_COUNT)
        {
          int end = k + SEQUENCE_LINE_BASE_COUNT;
          if(end > bases.length())
            end = bases.length();
          buff.append ( bases.substring(k,end) ).append("\n");
        }
      }
      
    
      private static void writeSequence(FileWriter writer, String header, String bases) throws IOException
    
        writer.write (">" + header + "\n");
    
        final int SEQUENCE_LINE_BASE_COUNT = 60;
        for(int k=0; k<bases.length(); k+=SEQUENCE_LINE_BASE_COUNT)
    
          int end = k + SEQUENCE_LINE_BASE_COUNT;
          if(end > bases.length())
            end = bases.length();
          writer.write ( bases.substring(k,end) + "\n");
    
      /**
       * Change the bases to reflect a variation record.
       * @param vcfRecord
       * @param bases
       * @param sbeg
       * @param isFwd
       * @param vcf_v4
       * @return
       */
    
      private static String getSeqsVariation(VCFRecord vcfRecord, 
    
          String bases, int sbeg, boolean isFwd, boolean vcf_v4)
    
        int position = vcfRecord.getPos()-sbeg;
        if(!isFwd)
    
    tjc's avatar
    tjc committed
          position = bases.length()-position-1;
    
        
        if(position > bases.length())
          return bases;
    
    tjc's avatar
    tjc committed
        else if(position < 0)
          return bases;
    
    tjc's avatar
    tjc committed
        if(position < bases.length()-1 && bases.charAt(position) == '-')
          return bases;
        
    
        StringBuffer buff = new StringBuffer();
    
    tjc's avatar
    tjc committed
        buff.append(bases.substring(0,position)); 
    
        
        if(vcfRecord.getAlt().isDeletion(vcf_v4))
        {
    
    tjc's avatar
    tjc committed
          int ndel = vcfRecord.getAlt().getNumberOfIndels(vcf_v4);
    
          if(!vcfRecord.getAlt().toString().equals(".") && isFwd)
          {
            buff.append(getBase(vcfRecord.getAlt().toString(), isFwd));
            position+=vcfRecord.getAlt().toString().length();
          }
    
    tjc's avatar
    tjc committed
          
          if(isFwd)
    
            position+=ndel-1;
    
    tjc's avatar
    tjc committed
          else
    
          {
            if(position-ndel+1 < 0)
              buff.delete(0, position);
            else   
              buff.delete(position-ndel+1, position);
          }
    
    tjc's avatar
    tjc committed
          
    
          for(int i=0; i<ndel; i++)
            buff.append("-");
        }
        else if(vcfRecord.getAlt().isInsertion(vcf_v4))
    
          buff.append(getBase(vcfRecord.getAlt().toString(), isFwd));
    
        else if(vcfRecord.getAlt().isMultiAllele())
    
    tjc's avatar
    tjc committed
          String base = MultipleAlleleVariant.getIUBCode(vcfRecord);
          if(base != null)
            buff.append(base);
          else
            buff.append(bases.charAt(position));
    
    tjc's avatar
    tjc committed
        else if(vcfRecord.getAlt().isNonVariant())                   // non-variant
    
          buff.append(getBase(vcfRecord.getRef(), isFwd).toUpperCase());
    
          buff.append(getBase(vcfRecord.getAlt().toString().toLowerCase(), isFwd));
    
        
        if(isFwd && position < bases.length())
          buff.append(bases.substring(position+1));
    
        else if(!isFwd && position < bases.length())
    
    tjc's avatar
    tjc committed
          buff.append(bases.substring(position+1));
    
        return buff.toString();
    
      
      /**
       * Get the actual bases by reverse complementing if on the
       * reverse strand.
       * @param baseStr
       * @param isFwd
       * @return
       */
      private static String getBase(String baseStr, boolean isFwd)
      {
        if(isFwd)
          return baseStr;
        return Bases.reverseComplement(baseStr);
      }
    
    
      /**
       * Get all features in an entry group.
       * @param predicate
       * @param entryGroup
       * @return
       */
      private static FeatureVector getFeatures(FeaturePredicate predicate, EntryGroup entryGroup)
      {
    
    tjc's avatar
    tjc committed
        final FeatureVector features = new FeatureVector ();
        final FeatureEnumeration feature_enum = entryGroup.features ();
        while (feature_enum.hasMoreFeatures ())
        {
          final Feature current_feature = feature_enum.nextFeature ();
          if (predicate.testPredicate (current_feature)) 
            features.add (current_feature);
        }
    
    tjc's avatar
    tjc committed
      }
    
    tjc's avatar
    tjc committed
    
      /**
       * Test if this is a BCF file.
       * @param fileName
       * @return
       * @throws IOException
       */
      protected static boolean isBCF(String fileName) throws IOException
      {
    
    tjc's avatar
    tjc committed
        InputStream ins;
        if(fileName.startsWith("http:"))
        {
          final URL urlBamIndexFile = new URL(fileName);
          ins = urlBamIndexFile.openStream();
        }
        else
          ins = new FileInputStream(fileName);
        BlockCompressedInputStream is = new BlockCompressedInputStream(ins);
    
    tjc's avatar
    tjc committed
        byte[] magic = new byte[4];
        is.read(magic);
    
    tjc's avatar
    tjc committed
        ins.close();
    
    tjc's avatar
    tjc committed
        is.close();
        String line = new String(magic);
        if(line.equals("BCF\4"))
          return true;
        return false;
      }
    
    tjc's avatar
    tjc committed
      
    
      /**
       *  Return the dirtectory that the given entry was read from.
       **/
      private static File getBaseDirectoryFromEntry(final Entry entry) 
      {
        final uk.ac.sanger.artemis.io.Entry embl_entry = entry.getEMBLEntry();
    
        if(embl_entry instanceof DocumentEntry) 
        {
          final DocumentEntry document_entry =(DocumentEntry) embl_entry;
    
          if(document_entry.getDocument() instanceof FileDocument) 
          {
            final FileDocument file_document =
             (FileDocument) document_entry.getDocument();
    
            if(file_document.getFile().getParent() != null) 
              return new File(file_document.getFile().getParent());
          }
        }
        if(((DocumentEntry)entry.getEMBLEntry()).getDocument()
           instanceof RemoteFileDocument ||
           ((DocumentEntry)entry.getEMBLEntry()).getDocument()
           instanceof DatabaseDocument)
          return new File(System.getProperty("user.dir"));
    
        return null;
      }
      
    
    tjc's avatar
    tjc committed
    }