From c55aeeb5b277bf975a54ad59fb3173f88581cdfd Mon Sep 17 00:00:00 2001
From: tjc <tjc@ee4ac58c-ac51-4696-9907-e4b3aa274f04>
Date: Fri, 3 Dec 2010 12:38:10 +0000
Subject: [PATCH] implement next()

git-svn-id: svn+ssh://svn.internal.sanger.ac.uk/repos/svn/pathsoft/artemis/trunk@15158 ee4ac58c-ac51-4696-9907-e4b3aa274f04
---
 .../artemis/components/variant/BCFReader.java | 199 +++++++++++-------
 1 file changed, 121 insertions(+), 78 deletions(-)

diff --git a/uk/ac/sanger/artemis/components/variant/BCFReader.java b/uk/ac/sanger/artemis/components/variant/BCFReader.java
index c3f932f50..1c1f67e99 100644
--- a/uk/ac/sanger/artemis/components/variant/BCFReader.java
+++ b/uk/ac/sanger/artemis/components/variant/BCFReader.java
@@ -1,3 +1,26 @@
+/*
+ * created: 2010
+ *
+ * This file is part of Artemis
+ *
+ * Copyright(C) 2010  Genome Research Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or(at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ */
+
 package uk.ac.sanger.artemis.components.variant;
 
 import java.io.File;
@@ -16,60 +39,91 @@ import net.sf.samtools.util.BlockCompressedInputStream;
 class BCFReader
 {
   public static final int TAD_LIDX_SHIFT = 13; // linear index shift
-  private static Pattern formatPattern = Pattern.compile("[^0-9]+"); 
+  private static Pattern formatPattern = Pattern.compile("[^0-9]+");
+  private BlockCompressedInputStream is;
   
-  public void query(File bcf, long offset, int beg, int end) throws IOException
+  public void query(File bcf, long offset) throws IOException
   {
-    BlockCompressedInputStream is = new BlockCompressedInputStream(bcf);
+    is = new BlockCompressedInputStream(bcf);
     is.seek(offset);
-    
-    for(int i=0; i<7299; i++)
+  }
+  
+  
+  public VCFRecord next(int bid, int beg, int end) throws IOException
+  {
+    try
     {
-      int id = readInt(is);
-      int pos = readInt(is)+1;
-      float qual = readFloat(is);
-      int slen = readInt(is);
-      System.out.print("ID: "+id + " POS: "+pos + " QUAL: "+qual + " ");
-      byte[] str = new byte[slen];
-      is.read(str);
+      VCFRecord bcfRecord = readVCFRecord();
+      if(bcfRecord.pos >= beg && bcfRecord.pos <= end)
+        return bcfRecord;
+      else if(bcfRecord.pos < beg)
+      {
+        while( (bcfRecord = readVCFRecord()).pos <= beg )
+        {
+          if(bcfRecord.pos >= beg && bcfRecord.pos <= end)
+            return bcfRecord;
+        }
+      }
+    } 
+    catch(Exception e)
+    {
+      if(is.read() != -1)  // eof
+        e.printStackTrace();
+    }
+    
+    return null;
+  }
+  
+  private VCFRecord readVCFRecord() throws IOException
+  {
+    VCFRecord bcfRecord = new VCFRecord();
+    bcfRecord.seqID = readInt(is);    
+    bcfRecord.pos = readInt(is)+1;
+    bcfRecord.quality = readFloat(is);
+    
+    int slen = readInt(is);
+    byte[] str = new byte[slen];
+    is.read(str);
 
-      String parts[] = getParts(str);
+    String parts[] = getParts(str);
+    
+    bcfRecord.ref = parts[0];
+    bcfRecord.alt = parts[1];
+    String fmt = parts[parts.length-1];
+
+    if(formatPattern.matcher(fmt).matches())
+    {
+      bcfRecord.info = parts[parts.length-2];
+      bcfRecord.format = parts[parts.length-1];
       
-      String ref = parts[0];
-      String alt = parts[1];
-      String fmt = parts[parts.length-1];
+      int nc  = 3;
+      if(bcfRecord.alt.equals("."))
+        nc = 1;
 
-      if(formatPattern.matcher(fmt).matches())
+      String fmts[] = bcfRecord.format.split(":");
+      for(int j=0; j<fmts.length; j++)
       {
-        String info = parts[parts.length-2];
-        System.out.println(info+"    "+fmt);
+        int nb = getByteSize(fmts[j],1,nc);
+        str = new byte[nb];
+        is.read(str);
         
-        String format = parts[parts.length-1];
-        
-        int nc  = 3;
-        System.out.println("ALT:"+alt+" REF:"+ref);
-        if(alt.equals("."))
-          nc = 1;
-
-        String fmts[] = format.split(":");
-        for(int j=0; j<fmts.length; j++)
-        {
-          int nb = getByteSize(fmts[j],1,nc);
-          str = new byte[nb];
-          is.read(str);
-          
-          if(fmts[j].equals("GT"))
-            System.out.println("nbytes = "+nb+" GT:"+getGTString(str[0]));
-          else if(fmts[j].equals("PL"))
-            System.out.println("nbytes = "+nb+" PL:"+getPLString(str, nc));
-          else if(fmts[j].equals("DP")||fmts[j].equals("SP")||fmts[j].equals("GQ"))
-          {
-            System.out.println("nbytes = "+nb+" "+fmts[j]+":"+byteToInt(str[0]));
-          } 
-
-        }
+        final String value;
+        if(fmts[j].equals("GT"))
+          value = getGTString(str[0]);
+        else if(fmts[j].equals("PL"))
+          value = getPLString(str, nc);
+        else if(fmts[j].equals("DP")||fmts[j].equals("SP")||fmts[j].equals("GQ"))
+          value = Integer.toString(byteToInt(str[0]));
+        else
+          value = "";
+        bcfRecord.data.put(fmts[j], value);
       }
+      
     }
+    else
+      bcfRecord.info = parts[parts.length-1];
+    
+    return bcfRecord;
   }
   
   /**
@@ -161,37 +215,34 @@ class BCFReader
     return (int)(b & 0xFF);
   }
   
-  public static List<Index> load(File bcfIndex) throws IOException
+  protected static List<BCFIndex> loadIndex(File bcfIndex) throws IOException
   {
     FileInputStream fis = new FileInputStream(bcfIndex);
     BlockCompressedInputStream is = new BlockCompressedInputStream(fis);
     byte[] magic = new byte[4];
     is.read(magic);
-    System.out.println(new String(magic));
     
-    int n = readInt(is);
+    if(!new String(magic).equals("BCI\4"))
+      System.err.println("Not a BCF index file:: "+new String(magic));
     
-    List<Index> idx = new Vector<Index>(n);
+    int n = readInt(is);
+    List<BCFIndex> idx = new Vector<BCFIndex>(n);
     
     for(int i=0; i<n; i++)
     {
-      Index idx2 = new Index();
+      BCFIndex idx2 = new BCFIndex();
       idx2.n = readInt(is);
       idx2.index2_offset = new long[idx2.n];
       
       for(int j=0; j<idx2.n; j++)
-      {
         idx2.index2_offset[j] = readLong(is);
-      }
-      
-      if(is.read() == -1)
-        System.out.println("EOF");
+
       idx.add(idx2);
     }
     return idx;
   }
   
-  public long queryIndex(List<Index> idx, int tid, int beg)
+  protected long queryIndex(List<BCFIndex> idx, int tid, int beg)
   {
     long min_off = -1;
     if (beg < 0) 
@@ -206,20 +257,20 @@ class BCFReader
     return min_off;
   }
   
-  public static int readInt(final InputStream is) throws IOException {
+  protected static int readInt(final InputStream is) throws IOException {
     byte[] buf = new byte[4];
     is.read(buf);
     return ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).getInt();
   }
   
   
-  public static float readFloat(final InputStream is) throws IOException {
+  protected static float readFloat(final InputStream is) throws IOException {
     byte[] buf = new byte[4];
     is.read(buf);
     return ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).getFloat();
   }
 
-  public static long readLong(final InputStream is) throws IOException {
+  protected static long readLong(final InputStream is) throws IOException {
     byte[] buf = new byte[8];
     is.read(buf);
     return ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).getLong();
@@ -229,25 +280,25 @@ class BCFReader
   {
     try
     {
-      List<Index> idx = load(new File(args[0]));
+      List<BCFIndex> idx = loadIndex(new File(args[0]));
       
-      int sbeg;
-      int send;
-      if(args.length < 3)
-      {
-        sbeg = 326758;
-        send = sbeg+1;
-      }
-      else
+      int sbeg = 0;
+      int send = Integer.MAX_VALUE;
+      if(args.length > 2)
       {
         sbeg = Integer.parseInt(args[2]);
         send = Integer.parseInt(args[3]);
       }
       
       BCFReader reader = new BCFReader();
-      long off = reader.queryIndex(idx, 0, sbeg);
-      System.out.println(off);
-      reader.query(new File(args[1]), off, sbeg, send);
+      int bid = 0;
+      long off = reader.queryIndex(idx, bid, sbeg);
+      reader.query(new File(args[1]), off);
+      
+      VCFRecord bcfRecord;
+      while( (bcfRecord = reader.next(bid, sbeg, send)) != null )
+        System.out.println(bcfRecord.toString());
+      
     }
     catch (IOException e)
     {
@@ -257,16 +308,8 @@ class BCFReader
   }
 }
 
-class BCFRecord
-{
-  int seqID;
-  int pos;
-  float quality;
-  String info;
-  String format;
-}
 
-class Index
+class BCFIndex
 {
   int n;
   long index2_offset[];
-- 
GitLab