diff --git a/uk/ac/sanger/artemis/components/variant/BCFReader.java b/uk/ac/sanger/artemis/components/variant/BCFReader.java index 4e49e9314224c3108f5e1b5b5fe963c37d60406e..b746e9a5e789f75c9cea283f49d4592f6ef50900 100644 --- a/uk/ac/sanger/artemis/components/variant/BCFReader.java +++ b/uk/ac/sanger/artemis/components/variant/BCFReader.java @@ -194,10 +194,10 @@ class BCFReader extends AbstractVCFReader if(formatPattern.matcher(bcfRecord.getFormat()).matches()) { - int n_alleles = bcfRecord.getNumAlleles(); + int n_alleles = bcfRecord.getAlt().getNumAlleles(); int nc = (int) (n_alleles * ((float)(((float)n_alleles+1.f)/2.f))); - if(bcfRecord.getAlt().equals(".")) + if(bcfRecord.getAlt().isNonVariant()) nc = 1; String fmts[] = bcfRecord.getFormat().split(":"); diff --git a/uk/ac/sanger/artemis/components/variant/IOUtils.java b/uk/ac/sanger/artemis/components/variant/IOUtils.java index c48a00a19e335d10a7de827582d82dbca9899add..f18f9b83f721f9eb3a71fea9ea507b51b575b458 100644 --- a/uk/ac/sanger/artemis/components/variant/IOUtils.java +++ b/uk/ac/sanger/artemis/components/variant/IOUtils.java @@ -29,8 +29,6 @@ import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.net.URL; -import java.util.Enumeration; -import java.util.Hashtable; import java.util.List; import javax.swing.JFileChooser; @@ -46,7 +44,6 @@ import uk.ac.sanger.artemis.FeatureVector; import uk.ac.sanger.artemis.components.MessageDialog; import uk.ac.sanger.artemis.components.StickyFileChooser; import uk.ac.sanger.artemis.components.variant.BCFReader.BCFReaderIterator; -import uk.ac.sanger.artemis.io.FastaStreamSequence; import uk.ac.sanger.artemis.io.Key; import net.sf.samtools.util.BlockCompressedInputStream; @@ -67,7 +64,7 @@ class IOUtils { try { - File filterFile = getFile(vcfFileName, nfiles); + File filterFile = getFile(vcfFileName, nfiles, "filter"); FileWriter writer = new FileWriter(filterFile); if(IOUtils.isBCF(vcfFileName)) { @@ -101,13 +98,13 @@ class IOUtils } } - private static File getFile(final String vcfFileName, final int nfiles) throws IOException + private static File getFile(final String vcfFileName, final int nfiles, final String suffix) throws IOException { if(nfiles > 1) - return new File(vcfFileName+".filter"); + return new File(vcfFileName+suffix); final StickyFileChooser file_dialog = new StickyFileChooser(); - file_dialog.setSelectedFile(new File(vcfFileName+".filter")); + file_dialog.setSelectedFile(new File(vcfFileName+suffix)); file_dialog.setDialogTitle("Choose save file ..."); file_dialog.setDialogType(JFileChooser.SAVE_DIALOG); final int status = file_dialog.showSaveDialog(null); @@ -149,54 +146,60 @@ class IOUtils final VCFview vcfView, final boolean vcf_v4) { - // get all CDS features that do not have the /pseudo qualifier + // get all CDS features that do not have the /pseudo qualifier final FeatureVector features = getFeatures( new FeatureKeyQualifierPredicate(Key.CDS, "pseudo", false), entryGroup); + exportFasta(entryGroup, vcfReaders, chr, vcfView, vcf_v4, features); + } + + + protected static void exportFasta(final EntryGroup entryGroup, + final AbstractVCFReader vcfReaders[], + final String chr, + final VCFview vcfView, + final boolean vcf_v4, + final FeatureVector features) + { + String suffix = ".fasta"; + if(features.size() == 1) + suffix = "."+features.elementAt(0).getIDString()+suffix; for (int i = 0; i < vcfReaders.length; i++) { String vcfFileName = vcfReaders[i].getFileName(); try { - File filterFile = getFile(vcfFileName, vcfReaders.length); + File filterFile = getFile(vcfFileName, vcfReaders.length, suffix); FileWriter writer = new FileWriter(filterFile); for (int j = 0; j < features.size(); j++) { Feature f = features.elementAt(j); FeatureSegmentVector segs = f.getSegments(); - Hashtable<String, String> seqs = new Hashtable<String, String>(); - String bases = f.getBases(); - seqs.put("ref", bases); - StringBuffer buff = new StringBuffer(); - for(int k=0; k<segs.size(); k++) { FeatureSegment seg = segs.elementAt(k); int sbeg = seg.getRawRange().getStart(); int send = seg.getRawRange().getEnd(); String segBases = seg.getBases(); - + if (vcfReaders[i] instanceof BCFReader) { BCFReaderIterator it = ((BCFReader) vcfReaders[i]).query(chr, sbeg, send); - VCFRecord bcfRecord = null; - + VCFRecord bcfRecord; while ((bcfRecord = it.next()) != null) - segBases = getSeqsVariations(bcfRecord, segBases, sbeg, f.isForwardFeature(), vcf_v4); + segBases = getSeqsVariation(bcfRecord, segBases, sbeg, f.isForwardFeature(), vcf_v4); } - buff.append(segBases); } - - Enumeration<String> en = seqs.keys(); - while(en.hasMoreElements()) - { - String key = en.nextElement(); - bases = seqs.get(key); - FastaStreamSequence stream = new FastaStreamSequence(bases, f.getIDString()+":"+key); - stream.writeToStream(writer); - } + + StringBuffer header = new StringBuffer(f.getSystematicName()); + header.append(" "+f.getIDString()+" "); + final String product = f.getProductString(); + header.append( (product == null ? "undefined product" : product) ); + header.append(" ").append(f.getWriteRange()); + + writeSequence(writer, header.toString(), buff.toString()); } writer.close(); @@ -208,24 +211,23 @@ class IOUtils } } - private static int getNumberOfDeletions(VCFRecord vcfRecord, boolean vcf_v4) + private static void writeSequence(FileWriter writer, String header, String bases) throws IOException { - if(vcf_v4) - return vcfRecord.getRef().length()-vcfRecord.getAlt().length(); - - int index = vcfRecord.getAlt().indexOf("D"); - int ndel = 0; - try + writer.write (">" + header + "\n"); + + final int SEQUENCE_LINE_BASE_COUNT = 60; + for(int k=0; k<bases.length(); k+=SEQUENCE_LINE_BASE_COUNT) { - ndel = Integer.parseInt( vcfRecord.getAlt().substring(index+1) ); + int end = k + SEQUENCE_LINE_BASE_COUNT; + if(end > bases.length()) + end = bases.length(); + writer.write ( bases.substring(k,end) + "\n"); } - catch(NumberFormatException e) { e.printStackTrace(); } - return ndel; } - protected static String getSeqsVariations(VCFRecord vcfRecord, + private static String getSeqsVariation(VCFRecord vcfRecord, String bases, int sbeg, boolean isFwd, boolean vcf_v4) - { + { int position = vcfRecord.getPos()-sbeg; if(!isFwd) position = bases.length()-position; @@ -233,24 +235,50 @@ class IOUtils if(position > bases.length()) return bases; - if(vcfRecord.isDeletion(vcf_v4)) + StringBuffer buff = new StringBuffer(); + if(isFwd) + buff.append(bases.substring(0,position)); + else if(position > 0) + buff.append(bases.substring(0,position-1)); + + if(vcfRecord.getAlt().isDeletion(vcf_v4)) + { + int ndel = vcfRecord.getAlt().getNumberOfDeletions(vcf_v4); + for(int i=0; i<ndel; i++) + buff.append("-"); + position+=ndel; + } + else if(vcfRecord.getAlt().isInsertion(vcf_v4)) { - int ndel = getNumberOfDeletions(vcfRecord, vcf_v4); - if(isFwd) - return bases.substring(0,position)+bases.substring(position+ndel); - else - return bases.substring(0,position-ndel)+bases.substring(position); } - else if(vcfRecord.isInsertion(vcf_v4)) + else if(vcfRecord.getAlt().isMultiAllele()) { + } + else if(vcfRecord.getAlt().isNonVariant()) + { + String ref = vcfRecord.getRef(); + if(vcfRecord.getAlt().isNonVariant()) + buff.append(ref.toUpperCase()); + else + buff.append(ref); } else { - + String alt = vcfRecord.getAlt().toString(); + if(vcfRecord.getAlt().isNonVariant()) + buff.append(alt.toUpperCase()); + else + buff.append(alt); } - return bases; + + if(isFwd && position < bases.length()) + buff.append(bases.substring(position+1)); + else + buff.append(bases.substring(position)); + + return buff.toString(); } /** diff --git a/uk/ac/sanger/artemis/components/variant/VCFRecord.java b/uk/ac/sanger/artemis/components/variant/VCFRecord.java index 8427bb525000e9ab1092bdeb51d736497ed3aa0b..fcee2c9c48fb7d29d45629344a1d32f4fe354aee 100644 --- a/uk/ac/sanger/artemis/components/variant/VCFRecord.java +++ b/uk/ac/sanger/artemis/components/variant/VCFRecord.java @@ -23,6 +23,8 @@ package uk.ac.sanger.artemis.components.variant; +import java.util.regex.Pattern; + import uk.ac.sanger.artemis.Feature; import uk.ac.sanger.artemis.FeatureVector; import uk.ac.sanger.artemis.io.Range; @@ -36,13 +38,14 @@ class VCFRecord private int pos; private String ID; private String ref; - private String alt; + private VariantBase var; private float quality; private String filter; private String info; private String format; private String data[][]; private short synFlag = -1; + protected static Pattern MULTI_ALLELE_PATTERN = Pattern.compile("^[AGCT]+,[AGCT,]+$"); /** @@ -51,17 +54,10 @@ class VCFRecord */ public String toString() { - return chrom+"\t"+pos+"\t"+ID+"\t"+ref+"\t"+alt+"\t"+quality+ + return chrom+"\t"+pos+"\t"+ID+"\t"+ref+"\t"+var.toString()+"\t"+quality+ "\t"+filter+"\t"+info+"\t"+format+"\t"+getSampleDataString(); } - - protected int getNumAlleles() - { - if (alt.equals(".")) - return 1; - - return alt.split(",").length+1; - } + /** * Parse a VCF line and return a VCFRecord @@ -77,7 +73,7 @@ class VCFRecord rec.pos = Integer.parseInt(parts[1]); rec.ID = parts[2]; rec.ref = parts[3]; - rec.alt = parts[4]; + rec.var = rec.new VariantBase(parts[4]); try { @@ -211,9 +207,9 @@ class VCFRecord /** * @return the alt */ - protected String getAlt() + protected VariantBase getAlt() { - return alt; + return var; } /** @@ -221,7 +217,7 @@ class VCFRecord */ protected void setAlt(String alt) { - this.alt = alt; + this.var = new VariantBase(alt); } /** @@ -304,39 +300,7 @@ class VCFRecord this.data = data; } - /** - * Is this a deletion type. - * @param variant - * @return - */ - protected boolean isDeletion(boolean vcf_v4) - { - if(vcf_v4) - { - if( alt.length() < ref.length() && !(alt.indexOf(",") > -1) ) - return true; - } - else if(alt.indexOf("D")>-1) - return true; - return false; - } - - /** - * Is this an insertion type. - * @param variant - * @return - */ - protected boolean isInsertion(boolean vcf_v4) - { - if(vcf_v4) - { - if( alt.length() > ref.length() && !(alt.indexOf(",") > -1) ) - return true; - } - else if(alt.indexOf("I")>-1) - return true; - return false; - } + /** * @param features @@ -363,7 +327,7 @@ class VCFRecord */ private short isSynonymous(FeatureVector features, int basePosition) { - char variant = getAlt().toLowerCase().charAt(0); + char variant = getAlt().toString().toLowerCase().charAt(0); int intronlength = 0; Range lastRange = null; @@ -450,4 +414,95 @@ class VCFRecord return 3; } + public class VariantBase + { + private String alt; + public VariantBase(String alt) + { + this.alt = alt; + } + + public String toString() + { + return alt; + } + + protected int length() + { + return alt.length(); + } + + /** + * Is this a deletion type. + * @param variant + * @return + */ + protected boolean isDeletion(boolean vcf_v4) + { + if(vcf_v4) + { + if( alt.length() < ref.length() && !(alt.indexOf(",") > -1) ) + return true; + } + else if(alt.indexOf("D")>-1) + return true; + return false; + } + + /** + * Is this an insertion type. + * @param variant + * @return + */ + protected boolean isInsertion(boolean vcf_v4) + { + if(vcf_v4) + { + if( alt.length() > ref.length() && !(alt.indexOf(",") > -1) ) + return true; + } + else if(alt.indexOf("I")>-1) + return true; + return false; + } + + protected boolean isMultiAllele() + { + if(VCFRecord.MULTI_ALLELE_PATTERN.matcher(alt).matches()) + return true; + return false; + } + + protected int getNumAlleles() + { + if (alt.equals(".")) + return 1; + + return alt.split(",").length+1; + } + + protected int getNumberOfDeletions(boolean vcf_v4) + { + String alt = getAlt().toString(); + if(vcf_v4) + return getRef().length()-alt.length(); + + int index = alt.indexOf("D"); + int ndel = 0; + try + { + ndel = Integer.parseInt( alt.substring(index+1) ); + } + catch(NumberFormatException e) { e.printStackTrace(); } + return ndel; + } + + protected boolean isNonVariant() + { + if(alt.equals(".")) + return true; + return false; + } + } + } \ No newline at end of file diff --git a/uk/ac/sanger/artemis/components/variant/VCFview.java b/uk/ac/sanger/artemis/components/variant/VCFview.java index e4f18a0235469cc1d394590c3f8a42e5e7e299ff..f8b7414f2c7e0c1282928f4b51606a960497181d 100644 --- a/uk/ac/sanger/artemis/components/variant/VCFview.java +++ b/uk/ac/sanger/artemis/components/variant/VCFview.java @@ -52,7 +52,6 @@ import java.net.URL; import java.util.Hashtable; import java.util.List; import java.util.Vector; -import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.swing.ButtonGroup; @@ -160,8 +159,6 @@ public class VCFview extends JPanel Hashtable<String, Integer> offsetLengths = null; private boolean concatSequences = false; - - private Pattern multiAllelePattern = Pattern.compile("^[AGCT]+,[AGCT,]+$"); protected static Pattern tabPattern = Pattern.compile("\t"); public static String VCFFILE_SUFFIX = ".*\\.[bv]{1}cf(\\.gz)*$"; @@ -186,7 +183,7 @@ public class VCFview extends JPanel this.feature_display = feature_display; this.vcfPanel = vcfPanel; this.vcfFiles = vcfFiles; - + setBackground(Color.white); MultiLineToolTipUI.initialize(); setToolTipText(""); @@ -514,6 +511,18 @@ public class VCFview extends JPanel }); export.add(exportVCF); + final JMenuItem exportFastaSelected = new JMenuItem("Export FASTA of selected features"); + exportFastaSelected.addActionListener(new ActionListener(){ + public void actionPerformed(ActionEvent e) + { + VCFview.this.setCursor(new Cursor(Cursor.WAIT_CURSOR)); + IOUtils.exportFasta(entryGroup, vcfReaders, chr, VCFview.this, vcf_v4, + selection.getAllFeatures()); + VCFview.this.setCursor(new Cursor(Cursor.DEFAULT_CURSOR)); + } + }); + export.add(exportFastaSelected); + final JMenuItem exportFasta = new JMenuItem("Export FASTA"); exportFasta.addActionListener(new ActionListener(){ public void actionPerformed(ActionEvent e) @@ -919,10 +928,10 @@ public class VCFview extends JPanel protected boolean showVariant(VCFRecord record, FeatureVector features, int basePosition) { - if(!showDeletions && record.isDeletion(vcf_v4)) + if(!showDeletions && record.getAlt().isDeletion(vcf_v4)) return false; - if(!showInsertions && record.isInsertion(vcf_v4)) + if(!showInsertions && record.getAlt().isInsertion(vcf_v4)) return false; if(!VCFFilter.passFilter(record)) @@ -931,14 +940,14 @@ public class VCFview extends JPanel if(!showNonOverlappings && !isOverlappingFeature(features, basePosition)) return false; - if(!showNonVariants && record.getAlt().equals(".")) + if(!showNonVariants && record.getAlt().isNonVariant()) return false; short isSyn = -1; markAsNewStop = false; if(markNewStops.isSelected() && - !record.isDeletion(vcf_v4) && - !record.isInsertion(vcf_v4) && + !record.getAlt().isDeletion(vcf_v4) && + !record.getAlt().isInsertion(vcf_v4) && record.getAlt().length() == 1 && record.getRef().length() == 1) { @@ -948,8 +957,8 @@ public class VCFview extends JPanel } if( (!showSynonymous || !showNonSynonymous) && - !record.isDeletion(vcf_v4) && - !record.isInsertion(vcf_v4) && + !record.getAlt().isDeletion(vcf_v4) && + !record.getAlt().isInsertion(vcf_v4) && record.getAlt().length() == 1 && record.getRef().length() == 1) { @@ -961,7 +970,7 @@ public class VCFview extends JPanel return false; } - if(!showMultiAlleles && multiAllelePattern.matcher(record.getAlt()).matches()) + if(!showMultiAlleles && record.getAlt().isMultiAllele()) return false; return true; @@ -1000,16 +1009,15 @@ public class VCFview extends JPanel g.setColor(getQualityColour(record)); else { - if(record.isDeletion(vcf_v4)) + if(record.getAlt().isDeletion(vcf_v4)) g.setColor(Color.gray); - else if(record.isInsertion(vcf_v4)) + else if(record.getAlt().isInsertion(vcf_v4)) g.setColor(Color.yellow); else if(record.getAlt().length() == 1 && record.getRef().length() == 1) g.setColor(getColourForSNP(record, features, basePosition)); else { - Matcher m = multiAllelePattern.matcher(record.getAlt()); - if(m.matches()) + if(record.getAlt().isMultiAllele()) { g.setColor(Color.orange); g.fillArc(pos[0]-3, pos[1]-LINE_HEIGHT-3, 6, 6, 0, 360); @@ -1035,7 +1043,7 @@ public class VCFview extends JPanel private Color getColourForSNP(VCFRecord record, FeatureVector features, int basePosition) { if(colourScheme == VARIANT_COLOUR_SCHEME) - return getVariantColour(record.getAlt()); + return getVariantColour(record.getAlt().toString()); else if(colourScheme == SYN_COLOUR_SCHEME) // synonymous / non-synonymous { short synFlag = record.getSynFlag(features, basePosition); @@ -1044,7 +1052,7 @@ public class VCFview extends JPanel else if(synFlag == 0 || synFlag == 2) return Color.blue; else - return getVariantColour(record.getAlt()); + return getVariantColour(record.getAlt().toString()); } else // score return getQualityColour(record);