Skip to content
Snippets Groups Projects
IndexFastaStream.java 9.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    /* 
     * created: 2010
     *
     * This file is part of Artemis
     *
     * Copyright(C) 2010  Genome Research Limited
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License
     * as published by the Free Software Foundation; either version 2
     * of the License, or(at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     *
     */
    package uk.ac.sanger.artemis.io;
    
    import java.io.File;
    import java.io.IOException;
    import java.io.Writer;
    import java.util.Iterator;
    
    tcarver's avatar
    tcarver committed
    import java.util.Vector;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
    import javax.swing.JOptionPane;
    
    
    tjc's avatar
    tjc committed
    import net.sf.picard.reference.FastaSequenceIndex;
    import net.sf.picard.reference.IndexedFastaSequenceFile;
    import net.sf.picard.reference.ReferenceSequence;
    
    tjc's avatar
    tjc committed
    import net.sf.picard.reference.ReferenceSequenceFileFactory;
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.io.Entry;
    import uk.ac.sanger.artemis.Options;
    import uk.ac.sanger.artemis.components.EntryFileDialog;
    
    import uk.ac.sanger.artemis.util.CacheHashMap;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.util.FileDocument;
    
    import uk.ac.sanger.artemis.util.ReadOnlyException;
    
    tjc's avatar
    tjc committed
    import uk.ac.sanger.artemis.util.URLDocument;
    
    tjc's avatar
    tjc committed
    
    public class IndexFastaStream extends StreamSequence 
    {
      private IndexedFastaSequenceFile indexSeqFile;
      private FastaSequenceIndex fastaIndex;
      private int len;
      private String contig;
    
      private CacheHashMap basesCache;  // used by charAt()
    
    tjc's avatar
    tjc committed
      
      public IndexFastaStream(Entry entry)
      {
    
    tjc's avatar
    tjc committed
        DocumentEntry doc = (DocumentEntry)entry;
        if(doc instanceof URLDocument)
        {
          //URL url = (URL)((URLDocument)doc).getLocation();
          // not supported yet
        }
        else
        {
          File fasta = ((FileDocument)doc.getDocument()).getFile();
    
    tjc's avatar
    tjc committed
          File parentDir = fasta.getParentFile();
          File fastaIndexFile;
          if(parentDir != null)
            fastaIndexFile = new File(parentDir.getAbsolutePath(), fasta.getName() + ".fai");
          else
            fastaIndexFile = new File(fasta.getName() + ".fai");
          
    
    tjc's avatar
    tjc committed
          fastaIndex = new FastaSequenceIndex(fastaIndexFile);
    
    tjc's avatar
    tjc committed
          
          try
          {
            indexSeqFile = new IndexedFastaSequenceFile(fasta, fastaIndex);
          }
          catch(IllegalArgumentException ie)
          {
            JOptionPane.showConfirmDialog(null, 
                "Expecting fasta extensions:\n"+
                ReferenceSequenceFileFactory.FASTA_EXTENSIONS.toString()+
                "\n"+ie.getMessage(), 
                "Error", JOptionPane.ERROR_MESSAGE);
          }
    
    tjc's avatar
    tjc committed
        }
        
    
    tjc's avatar
    tjc committed
        setContigByIndex(0);
      }
      
      /**
       * Test if the entry contains an indexed sequence
       * @param entry
       * @return
       */
    
    tcarver's avatar
    tcarver committed
      protected static boolean isIndexed(Entry entry)
    
    tjc's avatar
    tjc committed
      {
    
    tjc's avatar
    tjc committed
        try
    
    tjc's avatar
    tjc committed
        {
    
    tjc's avatar
    tjc committed
          if (entry instanceof DocumentEntry
              && ((DocumentEntry) entry).getDocument() instanceof FileDocument)
          {
            File fasta = ((FileDocument) ((DocumentEntry) entry).getDocument()).getFile();
    
            File parentDir = fasta.getParentFile();
            File fastaIndexFile;
            if(parentDir != null)
              fastaIndexFile = new File(parentDir.getAbsolutePath(), fasta.getName() + ".fai");
            else
              fastaIndexFile = new File(fasta.getName() + ".fai");
    
    tjc's avatar
    tjc committed
            if (fastaIndexFile.exists())
    
            {
              try
              {
                setExtensions();
              }
              catch(UnsupportedClassVersionError e)
              {
                System.err.println("Java version "+System.getProperty("java.version")+
                    " does not support indexed fasta - use Java 1.6 or higher.");
                return false;
              }
    
    tjc's avatar
    tjc committed
              return true;
    
    tjc's avatar
    tjc committed
          }
        }
        catch(Exception e)
        {
          e.printStackTrace();
    
    tjc's avatar
    tjc committed
        }
        return false;
      }
      
    
    tcarver's avatar
    tcarver committed
      protected boolean useIndex()
      {
        if(fastaIndex.size() == 1)
          return true;
    
        Object[] possibleValues = { "Use index", "Concatenate Sequences" };
        int sel = JOptionPane.showOptionDialog(null, 
            "Use the FASTA index file (to increase the performance)\n"+ 
            "or concatenate the sequences together?",
            "Indexed FASTA Found", 
            JOptionPane.DEFAULT_OPTION, JOptionPane.WARNING_MESSAGE,
            null, possibleValues, possibleValues[0]);
       
        return sel == 0;
      }
      
    
      /**
       * Add to supported FASTA allowed suffixes.
       */
      private static void setExtensions()
      {
        if(ReferenceSequenceFileFactory.FASTA_EXTENSIONS.contains(".dna"))
          return;
        ReferenceSequenceFileFactory.FASTA_EXTENSIONS.add(".dna");
        ReferenceSequenceFileFactory.FASTA_EXTENSIONS.add(".seq");
        ReferenceSequenceFileFactory.FASTA_EXTENSIONS.add(".fas");
        ReferenceSequenceFileFactory.FASTA_EXTENSIONS.add(".ffn");
      }
      
    
    tjc's avatar
    tjc committed
      public void setContigByIndex(int seqIndex) 
      {
        /*ReferenceSequence ref = getReferenceSequence(seqIndex);
        len = ref.length();
        contig = ref.getName();*/
        
        len = getLengthByIndex(seqIndex);
        contig = getContigByIndex(seqIndex);
      }
    
      /**
       *  Return a the given range of bases as a String.  Returns an empty
       *  sequence if the end position is less than the start position.
       *  @param start The start base of the range.
       *  @param end The end base of the range.
       **/
      public String getSubSequence(int start, int end) 
      {
    
    tjc's avatar
    tjc committed
        byte b[] = indexSeqFile.getSubsequenceAt(contig, start, end).getBases();
        return new String(b).toLowerCase();
    
    tjc's avatar
    tjc committed
      }
      
      public char[] getCharSubSequence(int start, int end) 
      {
        return getSubSequence(start, end).toCharArray();
      }
      
    
      /**
       * Used by AddMenu.markAmbiguities() to retrieve the sequence character
       * at a specified position
       */
      public char charAt(final int i)
      {
        if(basesCache == null)
          basesCache = new CacheHashMap(250,50);
          
        if(!basesCache.containsKey(i))
        {
          int end = i+249;
          if(end > length())
            end = length();
          String seq = getSubSequence(i, end);
          for(int idx=0; idx<seq.length(); idx++)
            basesCache.put(i+idx, seq.charAt(idx));
        }
        return (Character)basesCache.get(i);
      }
      
    
    tjc's avatar
    tjc committed
      private int getLengthByIndex(int seqIndex)
      {
        Iterator it = fastaIndex.iterator();
        int i = 0;
        while(it.hasNext())
        {
          Object obj = it.next();
          if(i == seqIndex)
          {
    
            String parts[] = obj.toString().split(";");
            for(String part: parts)
            {
              if(part.trim().startsWith("size"))
                return Integer.parseInt(part.substring(5).trim());
            }
    
    tjc's avatar
    tjc committed
          }
          i++;
        }
        return -1;
      }
      
      private String getContigByIndex(int seqIndex)
      {
        Iterator it = fastaIndex.iterator();
        int i = 0;
        while(it.hasNext())
        {
          Object obj = it.next();
          if(i == seqIndex)
    
          {
            String c = obj.toString().split(" ")[1];
            if(c.endsWith(";"))
              c = c.substring(0, c.length()-1);
            return c;
          }
    
    tjc's avatar
    tjc committed
          i++;
        }
        return null;
      }
      
    
    tcarver's avatar
    tcarver committed
      public Vector<String> getContigs()
      {
        Iterator it = fastaIndex.iterator();
        Vector<String> contigs = new Vector<String>();
        while(it.hasNext())
        {
          String contig = it.next().toString().split(";")[0];
          if(contig.startsWith("contig "))
            contig = contig.substring(6).trim();
          contigs.add(  contig );
        }
        return contigs;
      }
      
    
    tjc's avatar
    tjc committed
      public ReferenceSequence getReferenceSequence(int seqIndex)
      {
        int i = 0;
        ReferenceSequence ref;
        indexSeqFile.reset();
        while( (ref=indexSeqFile.nextSequence()) != null ) 
        {  
          if(i == seqIndex)
            return ref;
           i++;
        }
        return null;
      }
      
    
      /**
       *  Returns the length of the sequence in bases.
       **/
      public int length() 
      {
        return len;
      }
      
      @Override
      public StreamSequence copy()
      {
        // TODO Auto-generated method stub
        return null;
      }
    
      @Override
      public int getFormatType()
      {
        return StreamSequenceFactory.INDEXED_FASTA_FORMAT;
      }
    
      
      public void setFromChar(final char dna[])
      {
        JOptionPane.showMessageDialog(null,"Read only sequence.", 
            "Warning", JOptionPane.WARNING_MESSAGE);
        throw new RuntimeException(new ReadOnlyException());
      }
    
    tjc's avatar
    tjc committed
    
      @Override
      public void writeToStream(Writer writer) throws IOException
      {
        // TODO Auto-generated method stub
      }
      
      public IndexedFastaSequenceFile getIndexSeqFile()
      {
        return indexSeqFile;
      }
      
      public FastaSequenceIndex getFastaIndex()
      {
        return fastaIndex;
      }
    
    tjc's avatar
    tjc committed
      
    
    tcarver's avatar
    tcarver committed
      public String getContig()
      {
        return contig;
      }
      
    
    tjc's avatar
    tjc committed
      public static void main(String args[])
      {
        EntryInformation new_entry_information =
          new SimpleEntryInformation(Options.getArtemisEntryInformation());
    
        try
        {
          Entry emblEntry = null;
          if(args[0].startsWith("http:"))
          {
            
            
          }
          else
          {
            final uk.ac.sanger.artemis.Entry entry =  
              new uk.ac.sanger.artemis.Entry(EntryFileDialog.getEntryFromFile(
                        null, new FileDocument(new File(args[0])),
                        new_entry_information, true));
            emblEntry = entry.getEMBLEntry();
          }
          
          IndexFastaStream istream = new IndexFastaStream(emblEntry);
          System.out.println(istream.getCharSubSequence(1, 8000));
        }
        catch (Exception e)
        {
          e.printStackTrace();
        }
      }
    
    tjc's avatar
    tjc committed
    }