diff --git a/uk/ac/sanger/artemis/components/variant/IOUtils.java b/uk/ac/sanger/artemis/components/variant/IOUtils.java index 407dab698908b897716c3883a461a71ebe8c5b28..80ec4dff8547a9ccee3087a6293c9b9bfc88332e 100644 --- a/uk/ac/sanger/artemis/components/variant/IOUtils.java +++ b/uk/ac/sanger/artemis/components/variant/IOUtils.java @@ -23,13 +23,18 @@ */ package uk.ac.sanger.artemis.components.variant; +import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; +import java.io.PrintWriter; import java.io.Writer; import java.net.URL; +import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Vector; @@ -167,6 +172,276 @@ class IOUtils new MessageDialog (null, "Saved Files", filterFiles, false); } + /** + * Export all variant sites to a multiple fasta file. + * @param vcfView + */ + protected static void exportVariantFasta(final VCFview vcfView) + { + final EntryGroup entryGroup = vcfView.getEntryGroup(); + final int length = entryGroup.getSequenceLength(); + final Bases bases = entryGroup.getBases(); + FeatureVector features = entryGroup.getAllFeatures(); + + final String name = entryGroup.getActiveEntries().elementAt(0).getName(); + final File newfile = new File( + getBaseDirectoryFromEntry(entryGroup.getActiveEntries().elementAt(0)), + name); + try + { + final File f = getFile(newfile.getAbsolutePath(), 1, ".fasta", null); + if(f == null) + return; + + int ntotalSamples = 0; + for (int i = 0; i < vcfView.getVcfReaders().length; i++) + ntotalSamples += vcfView.getVcfReaders()[i].getNumberOfSamples(); + + final Writer[] writer = new Writer[ntotalSamples]; + final File[] tmpFiles = new File[ntotalSamples]; + + for (int i = 0; i < vcfView.getVcfReaders().length; i++) + { + final String names[] = vcfView.getVcfReaders()[i].sampleNames; + for(int j=0; j<names.length; j++) + { + tmpFiles[i] = File.createTempFile(names[j].replaceAll("[/\\:]", "_"), "art"); + writer[i] = new FileWriter( tmpFiles[i] ); + writer[i].write(">"+names[j]); + } + } + + int MAX_BASE_CHUNK = (10000/ntotalSamples)*SEQUENCE_LINE_BASE_COUNT; + //System.out.println("MAX_BASE_CHUNK "+MAX_BASE_CHUNK); + + final Hashtable<Integer, VCFRecord> records[] = new Hashtable[MAX_BASE_CHUNK]; + int baseCount = 0; + + // write variant sites to temp files + for(int i=0; i<length; i+=MAX_BASE_CHUNK) + { + int end = i+MAX_BASE_CHUNK; + int start = i+1; + if(end > length) + end = length; + + storeVCFRecords(vcfView, records, start, end); + baseCount = writeVariants(vcfView, records, writer, features, ntotalSamples, start, end, baseCount); + } + + for(int i=0; i<ntotalSamples; i++) + writer[i].close(); + + // concatenate the single fasta files into a multiple fasta file + final PrintWriter pw = new PrintWriter(new FileOutputStream(f)); + for (int i = 0; i < tmpFiles.length; i++) + { + final BufferedReader br = new BufferedReader(new FileReader(tmpFiles[i].getPath())); + String line; + while ( (line = br.readLine()) != null) + pw.println(line); + br.close(); + tmpFiles[i].delete(); + } + pw.close(); + } + catch(IOException e) + { + e.printStackTrace(); + } + } + + /** + * For a given range store the VFFRecord in a Hashtable. + * @param vcfView + * @param records + * @param start + * @param end + * @throws IOException + */ + private static void storeVCFRecords(final VCFview vcfView, + final Hashtable<Integer, VCFRecord> records[], + final int start, + final int end) throws IOException + { + + for (int i = 0; i < vcfView.getVcfReaders().length; i++) + records[i] = new Hashtable<Integer, VCFRecord> (); + + for (int i = 0; i < vcfView.getVcfReaders().length; i++) + { + AbstractVCFReader reader = vcfView.getVcfReaders()[i]; + if(vcfView.isConcatenate()) + { + String[] contigs = reader.getSeqNames(); + for(int j=0; j<contigs.length; j++) + { + int offset = vcfView.getSequenceOffset(contigs[j]); + int nextOffset; + if(j<contigs.length-1) + nextOffset = vcfView.getSequenceOffset(contigs[j+1]); + else + nextOffset = vcfView.seqLength; + + if( (start >= offset && start <= nextOffset) || + (end >= offset && end <= nextOffset) ) + { + int thisStart = start - offset; + if(thisStart < 1) + thisStart = 1; + loadRecords(records[i], reader, contigs[j], thisStart, end - offset, offset); + } + } + } + else + loadRecords(records[i], reader, vcfView.getChr(), start, end, 0); + } + } + + private static void loadRecords(Hashtable<Integer, VCFRecord> records, + final AbstractVCFReader reader, + final String contig, + final int start, + final int end, + final int offset) throws IOException + { + VCFRecord record; + while((record = reader.getNextRecord(contig, start, end)) != null) + { + if(records == null) + records = new Hashtable<Integer, VCFRecord> (); + records.put(record.getPos()+offset, record); + } + } + + private static int writeVariants(final VCFview vcfView, + final Hashtable<Integer, VCFRecord> records[], + final Writer writer[], + final FeatureVector features, + final int ntotalSamples, + final int start, + final int end, + int bc) throws IOException + { + for (int i = start; i < end; i++) + { + int basePosition = i; + int thisSample = 0; + final String[] thisBase = new String[ntotalSamples]; + boolean seenSNP = false; + int insertionLength = 0; + + // loop over each VCF file + for (int j = 0; j < vcfView.getVcfReaders().length; j++) + { + AbstractVCFReader reader = vcfView.getVcfReaders()[j]; + VCFRecord record = records[j].get(i); + + if(record == null) + continue; + + boolean vcf_v4 = reader.isVcf_v4(); + int nsamples = reader.getNumberOfSamples(); + // loop over each sample + for(int k=0; k<nsamples; k++) + { + // look at each type of variant + if(vcfView.showVariant(record, features, basePosition, reader, k, j) ) + { + if(record.getAlt().isDeletion(vcf_v4)) + { + thisBase[thisSample] = "-"; + } + else if(record.getAlt().isInsertion(vcf_v4)) + { + String in = record.getAlt().toString(); + if(in.startsWith("I")) + in = in.substring(1); + thisBase[thisSample] = in; + if(in.length() > insertionLength) + insertionLength = in.length(); + } + else if(record.getAlt().isMultiAllele(k)) + { + String base = MultipleAlleleVariant.getIUBCode(record); + if(base != null) + { + thisBase[thisSample] = base; + seenSNP = true; + } + } + else if(record.getAlt().isNonVariant()) + thisBase[thisSample] = "."; + else + { + thisBase[thisSample] = record.getAlt().toString(); + seenSNP = true; + } + } + else + thisBase[thisSample] = "N"; // filtered out + + thisSample++; + } + } + + if(seenSNP) + { + int remainder = 0; + for(int j=0; j<thisBase.length; j++) + { + if(thisBase[j] != null) + { + for(int k=0; k<thisBase[j].length(); k++) + { + remainder = (bc+k)%SEQUENCE_LINE_BASE_COUNT; + + /*if(j==0 && k==0 && bc>3299 && bc<3365) + System.out.println("HERE "+thisBase[j]+" "+bc+" "+basePosition+" "+remainder);*/ + + if(remainder == 0) + writer[j].write(System.getProperty("line.separator")); + writer[j].write(thisBase[j].charAt(k)); + } + } + else + { + remainder = bc%SEQUENCE_LINE_BASE_COUNT; + if(remainder == 0) + writer[j].write(System.getProperty("line.separator")); + writer[j].write("N"); + } + + if(insertionLength > 0) + { + int ins; + if(thisBase[j] != null) + ins = insertionLength-thisBase[j].length(); + else + ins = insertionLength-1; + + int rem = remainder+1; + for(int k=0; k<ins; k++) + { + remainder = (rem+k)%SEQUENCE_LINE_BASE_COUNT; + if(remainder == 0) + writer[j].write(System.getProperty("line.separator")); + writer[j].write("-"); + } + } + } + + if(insertionLength > 0) + bc+=insertionLength; + else + bc++; + } + } + + return bc; + } + + /** * Write out FASTA for a selected base range * @param vcfView @@ -406,7 +681,8 @@ class IOUtils header.append(reader.getName()).append(" "); header.append(seqName).append(" "); header.append(sbeg).append(":").append(send); - header.append((marker.isForwardMarker() ? "" : " reverse")); + if(marker != null) + header.append((marker.isForwardMarker() ? "" : " reverse")); return header; } diff --git a/uk/ac/sanger/artemis/components/variant/VCFview.java b/uk/ac/sanger/artemis/components/variant/VCFview.java index 00a1f2bbe22a9c982194a98ff370d081f60e3f9f..6fb8b821c7dce39c1c7af354bec766aa8ba3ac05 100644 --- a/uk/ac/sanger/artemis/components/variant/VCFview.java +++ b/uk/ac/sanger/artemis/components/variant/VCFview.java @@ -466,6 +466,7 @@ public class VCFview extends JPanel } }); export.add(exportVCF); + export.add(new JSeparator()); final JMenuItem exportFastaSelected = new JMenuItem("FASTA of selected feature(s) ..."); @@ -504,6 +505,26 @@ public class VCFview extends JPanel }); export.add(exportFasta); + final JMenuItem viewMinimalFasta = new JMenuItem("FASTA of variant sites only ..."); + viewMinimalFasta.addActionListener(new ActionListener(){ + public void actionPerformed(ActionEvent e) + { + Container f = getVcfContainer(); + try + { + f.setCursor(new Cursor(Cursor.WAIT_CURSOR)); + IOUtils.exportVariantFasta(VCFview.this); + } + finally + { + f.setCursor(new Cursor(Cursor.DEFAULT_CURSOR)); + } + } + }); + export.addSeparator(); + export.add(viewMinimalFasta); + + final JMenu view = new JMenu("View"); popup.add(view); final JMenuItem viewFastaSelected = new JMenuItem("FASTA of selected feature(s) ..."); @@ -541,7 +562,8 @@ public class VCFview extends JPanel } }); view.add(viewFasta); - + + JMenu graph = new JMenu("Graph"); popup.add(graph);