Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
package uk.ac.sanger.artemis.components.variant;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.URL;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import javax.swing.JFrame;
import javax.swing.JPanel;
import junit.framework.Assert;
import org.junit.Before;
import org.junit.Test;
import uk.ac.sanger.artemis.Entry;
import uk.ac.sanger.artemis.EntryGroup;
import uk.ac.sanger.artemis.FeatureVector;
import uk.ac.sanger.artemis.Feature;
import uk.ac.sanger.artemis.Options;
import uk.ac.sanger.artemis.Selection;
import uk.ac.sanger.artemis.SimpleEntryGroup;
import uk.ac.sanger.artemis.components.EntryFileDialog;
import uk.ac.sanger.artemis.components.variant.VCFview;
import uk.ac.sanger.artemis.io.EntryInformation;
import uk.ac.sanger.artemis.sequence.MarkerRange;
import uk.ac.sanger.artemis.sequence.Strand;
import uk.ac.sanger.artemis.util.Document;
import uk.ac.sanger.artemis.util.DocumentFactory;
/**
* Tests for writing out sequences based on VCF data.
*/
public class WriteVCFTest
{
private VCFview vcfView;
/**
* Write FASTA, from a range selection on the forward strand.
* Variation is a SNP.
* VCF line : test 120768 . C T
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 120768:120777\n");
buff.append("cttgtcaagg\n");
buff.append(">test.vcf.gz test.embl.gz 120768:120777\n");
buff.append("tttgtcaagg\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
/**
* Write FASTA, from a range selection on the reverse strand.
* Variation is a SNP.
* VCF line : test 120768 . C T
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 120768:120777 reverse\n");
buff.append("ccttgacaag\n");
buff.append(">test.vcf.gz test.embl.gz 120768:120777 reverse\n");
buff.append("ccttgacaaa\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
/**
* Write FASTA, from a range selection on the forward strand.
* Variation is a deletion.
* test 396838 . tt t
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 396835:396845\n");
buff.append("tttttaggtat\n");
buff.append(">test.vcf.gz test.embl.gz 396835:396845\n");
buff.append("tttt-aggtat\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
/**
* Write FASTA, from a range selection on the reverse strand.
* Variation is a deletion.
* test 396838 . tt t
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 396835:396845 reverse\n");
buff.append("atacctaaaaa\n");
buff.append(">test.vcf.gz test.embl.gz 396835:396845 reverse\n");
buff.append("ataccta-aaa\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
/**
* Write FASTA, from a range selection on the forward strand.
* Variation is a insertion.
* test 366787 . t tT
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 366785:366795\n");
buff.append("tttcgcttttt\n");
buff.append(">test.vcf.gz test.embl.gz 366785:366795\n");
buff.append("tttTcgcttttt\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
/**
* Write FASTA, from a range selection on the reverse strand.
* Variation is a insertion.
* test 366787 . t tT
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 366785:366795 reverse\n");
buff.append("aaaaagcgaaa\n");
buff.append(">test.vcf.gz test.embl.gz 366785:366795 reverse\n");
buff.append("aaaaagcgaaaa\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
/**
* Write FASTA, from a range selection on the forward strand.
* Variation is a multiple allele, (PL number indicates either G or T):
* test 361978 . G T 77.76842 . DP=11;AF1=0.95;CI95=0.5,1;DP4=1,0,10,0;MQ=58;PV4=1,9.6e-06,0.37,1 PL:DP:SP:GT:GQ 106,0,0:11:0:1/1:10
*/
@Test
StringBuffer buff = new StringBuffer("> test.embl.gz 361975:361985\n");
buff.append("actgaaaaatt\n");
buff.append(">test.vcf.gz test.embl.gz 361975:361985\n");
buff.append("actkaaaaatt\n");
assertEquals("Export FASTA range ", writer.toString(), buff.toString());
}
// TEST WRITING FEATURES
/**
* Write FASTA, from a feature selection on the forward strand.
*/
@Test
StringWriter writer = getFeatureWriter("SPN23F03630,SPN23F03800,SPN23F04290");
StringBuffer fastaBuff = new StringBuffer(">test.embl.gz\n");
StringBuffer basesBuff = new StringBuffer();
basesBuff.append("atgaagaaaactgtttataaaaaattgggtatttcaattattgcgagtactttattggct");
basesBuff.append("agccagttatcgacagtatctgctttgagtgttatttctagtacaggtgaagaatatgag");
basesBuff.append("gtaagtgagacactagaaaaaggtccagagtctaatgattcttcattatctgagatttca");
basesBuff.append("ccaacgtatggttcatactaccaaaagcaatcagaagtattatcggtaatgatgatttga");
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
basesBuff.append("atggcaagtatcacactcacaccaagcgaaaaggatattcaggcttttcttgaacactat");
basesBuff.append("caaaccagtctggctcctagcaagaatccctatatccgctactttttgaaactacctcaa");
basesBuff.append("gcaacggtttctatctatacttctggaaaaatcttgcttcagggtgaaggggctgaaaaa");
basesBuff.append("tacgccagtttctttggctatcaagctgtagagcaaaccagcggacaaaatcttccttta");
basesBuff.append("attgggacagatgaggtgggaaatggttcctactttggtgggcttgcagttgtggctgcc");
basesBuff.append("tttgtcacacctgaccagcacgactttttacgaaaactcggtgtgggggattctaagact");
basesBuff.append("ctgaccgaccaaaagatccgtcagattgctcctattctcaaggaaaaaattcagcaccag");
basesBuff.append("gcactccttctctcacccagcaagtacaacgaggtcatcggagaccgctacaatgctgtt");
basesBuff.append("tcggttaaggttgccctccataatcaggctatctatctcctccttcaaaaaggtgttcag");
basesBuff.append("cctgagaaaattgtgattgatgcctttaccagtgctaaaaattatgacaagtacttggca");
basesBuff.append("caagagaccaatcgtttcagcaatcctatcagcttagaagaaaaggctgagggcaaatac");
basesBuff.append("ttggctgtcgcagtttcttctgtcattgcgcgtgatctctttctggaaaatcttgaaaat");
basesBuff.append("ttgggacgagaactgggttatcagcttccaagtggagctggaacggcttctgacaaggtg");
basesBuff.append("gctagccagattttgcaagcctatggtatgcagggactcaacttctgcgccaaattgcac");
basesBuff.append("tttaaaaatactgaaaaagcgaaaaacgcttag");
//SPN23F04290
basesBuff.append("atgctttatgtgggcattgatatcgctaaaaataaacacgatgttacagccttgaatgtt");
basesBuff.append("ccaggaaaaactgttcttaaaccactcactttttcaaataataaagctggttttgaactc");
basesBuff.append("ttagatctgtctcttcgacagctcaaccaagactgtctcatcgctcttaaacttctttct");
basesBuff.append("gatcccaatcgtgaacaatttcaacacgataatcggcaagtagacctaaaaatactggct");
basesBuff.append("agacatattcatcgtctcaagaaaaaacagtctgattggaaagtacaatacactcgttgt");
basesBuff.append("cttgatatcatctttcctgagttggataaaatcgttggaaagcattcagaatatacctac");
basesBuff.append("caactcttgacgcgctaccctaatcctcagaaaaggattgaggcaggatttgataagctg");
basesBuff.append("atagaaattaagcgattgaccgcttctaaaattcaggatatcctctcagtcgcacctcgt");
basesBuff.append("tctatcgaaacaacatctcctgctcgtgaattcgaaatcatcgaaatcatcaaacattac");
basesBuff.append("aagaggctcattgacaaggcggaaacatgtgtcaatgacttgatggctgagttcaactca");
basesBuff.append("gtcatcacgacggttactgggattgggggtcgtttaggggcggtcattttagccgagatt");
basesBuff.append("cgaaatattcatgcctttgataatcctgctcaattacaagctttcgctggactggattct");
basesBuff.append("tctatttatcagtcaggtcagattgatttagctggaagaatgatcaaacggggttcccct");
basesBuff.append("catctgcggtgggcactcatacaagctgccaaagcatgcgctcgcttttcacctgctttt");
basesBuff.append("aaggcctatcttaagactaagttagaacaaggaaaacattacaatgtagccatcatccac");
basesBuff.append("cttgcaaaaaaacttatccgaaccctgttttatatcctaaaaaagagctgccatttgacg");
basesBuff.append("aacaaaaagtga");
IOUtils.wrapString(basesBuff.toString(), fastaBuff);
fastaBuff.append(">test.vcf.gz \n");
basesBuff.append("atgaagaaaactatttataaaaaattgggtatttcaattattgcgagtactttattggct");
basesBuff.append("agccagttatcgacagtatctgctttgagtgttatttctagtacaggtgaagaatatgag");
basesBuff.append("gtaagtgagacaytagaaaaaggtccagagtctaatrattcttcattatctgagatttca");
basesBuff.append("ccaacgtatggttcatactaccaaaagcaatcagaagtattatcggtaatgatgatttga");
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
basesBuff.append("atggcaagtatcacactcacaccaagcgaaaaggatattcaggcttttcttgaacactat");
basesBuff.append("caaaccagtctggctcctagcaagaatccctatatccgctactttttgaaactacctcaa");
basesBuff.append("gcaacggtttctatctatacttctggaaaaatcttgcttcagggtgaaggggctgaaaaa");
basesBuff.append("tacgccagtttctttggctatcaagctgtagagcaaaccagcggacaaaatcttccttta");
basesBuff.append("attgggacagatgaggtgggaaatggttcctactttggtgggcttgcagttgtggctgcc");
basesBuff.append("tttgtcacacctgaccagcacgactttttacgaaaactcggtgtgggggattctaagact");
basesBuff.append("ctgaccgaccaaaagatccgtcagattgctcctattctcaaggaaaaaatccagcaccag");
basesBuff.append("gcactccttctctcacccagcaagtacaacgaggtcatcggagaccgctacaatgctgtt");
basesBuff.append("tcggttaaggttgccctccataatcaggctatctatctcctccttcaaaaaggtgttcag");
basesBuff.append("cctgagaaaattgtgattgatgcctttaccagtgctaaaaattatgacaagtacttggca");
basesBuff.append("caagaggccaatcgtttcagcaatcctatcagcttagaaaaaaaggctgagggcaaatac");
basesBuff.append("ttggctgtcgcagtttcttctgtcattgcgcgtgatctctttctggaaaatcttgaaaat");
basesBuff.append("ctgggacgagaactgggttatcagctcccaagtggagctggaacagcttctgacaaggtg");
basesBuff.append("gctagccagattttgcaagcctatggtatgcagggactcagcttctgcgccaaattgcac");
basesBuff.append("tttaaaaacactgaaaaagcgaaaaaaacgcttag");
//SPN23F04290
basesBuff.append("atgctttatgtgggcattgatatcgctaaaaataaacacgatgttacagccttgaatgtt");
basesBuff.append("ccaggaaaaactgttcttaaaccactcactttttcaaataataaagctggttttgaactc");
basesBuff.append("ttagatctgtctcttcgacagctcaaccaagactgtctcatcgctcttaaacttctttct");
basesBuff.append("gaccccaatcgtgaacaatttcaacacgataatcggcaagtagaactaaaaatactggct");
basesBuff.append("agacatattcatcgtctcaagaaaaaacagtctgattggaaagtacaatacactcgttgt");
basesBuff.append("cttgatatcatctttcctgagttggataaaatcgttgaaaagcattcagaatatacctac");
basesBuff.append("caactcttgacgcgctaccctaatcctcagaaaaggcttgaggcaggatttgataagctg");
basesBuff.append("atagaaattaagcgattgaccgcttctaaaattcaggatatcctctcagttgcacctcgt");
basesBuff.append("tctatcgmaacaacatctcctgctcgtgaattcgaaatcatcgaaatcatcaaacattac");
basesBuff.append("aagaggctcattgacaaggcggaaacatgtgtcaatgacttgatggctgagttcaactcg");
basesBuff.append("gtcatcacgacggtcactgggattgggaatcgtttagaggcggtcattttagccgagatt");
basesBuff.append("cgaaatattcatgcctttgataatcctgctcaattacaagctttcgctggactggattct");
basesBuff.append("tctatttatcagtcaggtcagattgatttagctggaagaatgatcaaacggggttcccct");
basesBuff.append("catctgcggtgggcactcatacaagctgccaaagcatgccctcgcttttcacctgctttt");
basesBuff.append("aaggcctatcttaagactaagttagaataaggaaaacattacaatgtagccatcatccac");
basesBuff.append("cttgcaaaaaaacttatccgaaccctgttttatatcctaaaaaagagctgccatttgacg");
basesBuff.append("aacaaaaagtga");
IOUtils.wrapString(basesBuff.toString(), fastaBuff);
assertEquals("Export FASTA feature ", writer.toString(), fastaBuff.toString());
* @param isFwd
* @param start
* @param end
* @return
*/
private StringWriter getRegionWriter(boolean isFwd, int start, int end)
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
{
StringWriter writer = new StringWriter();
Selection selection = new Selection(null);
FeatureVector features = vcfView.getEntryGroup().getAllFeatures();
Feature feature = null;
for(int i=0; i<features.size(); i++)
{
if(features.elementAt(i).isForwardFeature() && isFwd)
{
feature = features.elementAt(i);
break;
}
else if(!features.elementAt(i).isForwardFeature() && !isFwd)
{
feature = features.elementAt(i);
int length = vcfView.getEntryGroup().getSequenceEntry().getBases().getLength();
int tmp = start;
start = length - end + 1;
end = length - tmp + 1;
break;
}
}
try
{
selection.setMarkerRange(new MarkerRange(feature.getStrand(), start, end));
}
catch(uk.ac.sanger.artemis.util.OutOfRangeException e)
{
e.printStackTrace();
}
IOUtils.exportFastaByRange(vcfView, selection, false, writer);
return writer;
}
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/**
* Get sequences for features.
* @param id comma separated list of feature id's
* @return
*/
private StringWriter getFeatureWriter(String ids)
{
StringWriter writer = new StringWriter();
String id[] = ids.split(",");
FeatureVector features = vcfView.getEntryGroup().getAllFeatures();
FeatureVector selectedFeatures = new FeatureVector();
Feature feature = null;
for(int i=0; i<features.size(); i++)
{
for(int j=0; j<id.length; j++)
{
if(features.elementAt(i).getIDString().equals(id[j]))
{
feature = features.elementAt(i);
selectedFeatures.add(feature);
}
}
}
IOUtils.exportFasta(vcfView, selectedFeatures, false, writer);
return writer;
}
/**
* Open a flat file and create the components in the TransferAnnotaionTool
* used to control the transfer of annotation.
*/
@Before
public void setup()
{
final URL ref = WriteVCFTest.class.getResource("/data/test.embl.gz");
final URL vcf = WriteVCFTest.class.getResource("/data/test.vcf.gz");
List<String> vcfFileList = new Vector<String>();
vcfFileList.add(vcf.getFile());