From d99d0678b433be01c67386c7cccf8e7d56a517e6 Mon Sep 17 00:00:00 2001 From: tcarver <tjc> Date: Wed, 8 Feb 2012 16:33:39 +0000 Subject: [PATCH] changes to writing variant sites --- .../artemis/components/variant/IOUtils.java | 67 +++++++++++++++---- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/uk/ac/sanger/artemis/components/variant/IOUtils.java b/uk/ac/sanger/artemis/components/variant/IOUtils.java index 0a2765e03..ec1d7a8af 100644 --- a/uk/ac/sanger/artemis/components/variant/IOUtils.java +++ b/uk/ac/sanger/artemis/components/variant/IOUtils.java @@ -179,10 +179,6 @@ class IOUtils protected static void exportVariantFasta(final VCFview vcfView) { final EntryGroup entryGroup = vcfView.getEntryGroup(); - int length = entryGroup.getSequenceLength(); - final Bases bases = entryGroup.getBases(); - final FeatureVector features = entryGroup.getAllFeatures(); - final String name = entryGroup.getActiveEntries().elementAt(0).getName(); final File newfile = new File( getBaseDirectoryFromEntry(entryGroup.getActiveEntries().elementAt(0)), @@ -194,6 +190,30 @@ class IOUtils if(f == null) return; + exportVariantFasta(vcfView, + new PrintWriter(new FileOutputStream(f)), + entryGroup.getSequenceLength(), + entryGroup.getAllFeatures(), + entryGroup.getBases()); + } + catch(IOException ioe) + { + ioe.printStackTrace(); + } + } + + /** + * Export all variant sites to a multiple fasta file. + * @param vcfView + */ + protected static void exportVariantFasta(final VCFview vcfView, + final PrintWriter pw, + final int length, + final FeatureVector features, + final Bases bases) + { + try + { int ntotalSamples = 0; for (int i = 0; i < vcfView.getVcfReaders().length; i++) ntotalSamples += vcfView.getVcfReaders()[i].getNumberOfSamples(); @@ -206,16 +226,18 @@ class IOUtils final String names[] = vcfView.getVcfReaders()[i].sampleNames; for(int j=0; j<names.length; j++) { - tmpFiles[i+j] = File.createTempFile(names[j].replaceAll("[/\\:]", "_"), "art"); + final String fn = (names[j].equals("") ? (j+1)+"_sample" : names[j].replaceAll("[/\\:]", "_")); + tmpFiles[i+j] = File.createTempFile(fn, "art"); writer[i+j] = new FileWriter( tmpFiles[i+j] ); - writer[i+j].write(">"+names[j]); + writer[i+j].write(">"+fn); } } // include reference bases + final String refName = vcfView.getEntryGroup().getActiveEntries().elementAt(0).getName(); tmpFiles[ntotalSamples] = File.createTempFile("ref", "art"); writer[ntotalSamples] = new FileWriter( tmpFiles[ntotalSamples] ); - writer[ntotalSamples].write(">reference"); + writer[ntotalSamples].write(">"+refName); final int MAX_BASE_CHUNK = (10000/ntotalSamples)*SEQUENCE_LINE_BASE_COUNT; @@ -223,7 +245,6 @@ class IOUtils int baseCount = 0; // write variant sites to tmp files - //length = 2172095; for(int i=0; i<length; i+=MAX_BASE_CHUNK) { int start = i+1; @@ -240,7 +261,6 @@ class IOUtils writer[i].close(); // concatenate the single fasta files into a multiple fasta file - final PrintWriter pw = new PrintWriter(new FileOutputStream(f)); for (int i = 0; i < tmpFiles.length; i++) { final BufferedReader br = new BufferedReader(new FileReader(tmpFiles[i].getPath())); @@ -367,8 +387,7 @@ class IOUtils if(record == null) continue; - - + boolean vcf_v4 = reader.isVcf_v4(); int nsamples = reader.getNumberOfSamples(); // loop over each sample @@ -379,7 +398,22 @@ class IOUtils { if(record.getAlt().isDeletion(vcf_v4)) { + // note: do not write out if just deletion thisBase[thisSample] = "-"; + + /*if( thisBase[ntotalSamples] == null || + thisBase[ntotalSamples].length() < record.getRef().length() ) + { + thisBase[ntotalSamples] = record.getRef(); + if(!record.getAlt().toString().equals(".")) + thisBase[thisSample] = record.getAlt().toString(); + else + thisBase[thisSample] = ""; + + int padLength = thisBase[ntotalSamples].length() - thisBase[thisSample].length(); + for(int ipad=0; ipad<padLength; ipad++) + thisBase[thisSample] += "-"; + }*/ } else if(record.getAlt().isInsertion(vcf_v4)) { @@ -389,6 +423,12 @@ class IOUtils thisBase[thisSample] = in; if(in.length() > insertionLength) insertionLength = in.length(); + seenSNP = true; + + if( (thisBase[ntotalSamples] == null || + thisBase[ntotalSamples].length() < record.getRef().length()) && + in.toLowerCase().startsWith(record.getRef().toLowerCase())) + thisBase[ntotalSamples] = record.getRef(); } else if(record.getAlt().isMultiAllele(k)) { @@ -400,11 +440,12 @@ class IOUtils } } else if(record.getAlt().isNonVariant()) + { thisBase[thisSample] = "."; + } else { - if(record.getAlt().toString().length() > 1 && - record.getAlt().toString().length() == record.getRef().length() ) + if(record.getAlt().toString().length() == record.getRef().length() ) { thisBase[thisSample] = record.getAlt().toString(); seenSNP = true; -- GitLab