From 72ef2ef51ccc6fd83ae04b9486252f197443a2dd Mon Sep 17 00:00:00 2001
From: tcarver <tjc>
Date: Mon, 9 Sep 2013 09:22:59 +0100
Subject: [PATCH] validation tests
---
.../artemis/io/ValidateFeatureTest.java | 340 ++++++++++++++++++
1 file changed, 340 insertions(+)
create mode 100644 test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java
diff --git a/test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java b/test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java
new file mode 100644
index 000000000..bbafa43ff
--- /dev/null
+++ b/test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java
@@ -0,0 +1,340 @@
+package uk.ac.sanger.artemis.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.log4j.Level;
+import org.junit.Test;
+
+import junit.framework.Assert;
+
+import uk.ac.sanger.artemis.EntryGroup;
+import uk.ac.sanger.artemis.Options;
+import uk.ac.sanger.artemis.SimpleEntryGroup;
+import uk.ac.sanger.artemis.components.EntryFileDialog;
+import uk.ac.sanger.artemis.components.genebuilder.GeneUtils;
+import uk.ac.sanger.artemis.io.DocumentEntryFactory;
+import uk.ac.sanger.artemis.io.Entry;
+import uk.ac.sanger.artemis.io.EntryInformationException;
+import uk.ac.sanger.artemis.io.FeatureVector;
+import uk.ac.sanger.artemis.io.GFFStreamFeature;
+import uk.ac.sanger.artemis.io.ValidateFeature;
+import uk.ac.sanger.artemis.util.Document;
+import uk.ac.sanger.artemis.util.DocumentFactory;
+import uk.ac.sanger.artemis.util.OutOfRangeException;
+import uk.ac.sanger.artemis.sequence.NoSequenceException;
+
+public class ValidateFeatureTest
+{
+ @Test
+ public void testGFF()
+ {
+ try
+ {
+ final Entry entry = getEntry("/data/test.gff.gz");
+ final EntryGroup egrp = new SimpleEntryGroup();
+ egrp.add(new uk.ac.sanger.artemis.Entry(entry));
+ ValidateFeature validate = new ValidateFeature(egrp);
+
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ String id = GeneUtils.getUniqueName(gffFeature);
+
+ assertTrue("Boundary "+id, ValidateFeature.isBoundaryOK(gffFeature) == 0);
+ assertTrue("Strand "+id, ValidateFeature.isStrandOK(gffFeature));
+ assertTrue("CDS phase "+id, ValidateFeature.isCDSPhaseOK(gffFeature));
+ assertTrue("Attribute "+id, ValidateFeature.isAttributesOK(gffFeature).length() == 0);
+ assertTrue("ID check "+id, ValidateFeature.isIdPrefixConsistent(gffFeature));
+ assertTrue("Start_range check "+id, ValidateFeature.isPartialConsistent(gffFeature, "Start_range"));
+ assertTrue("End_range check "+id, ValidateFeature.isPartialConsistent(gffFeature, "End_range"));
+
+ if(ValidateFeature.isPartOfGene(gffFeature))
+ assertTrue("Gene model "+id, ValidateFeature.isCompleteGeneModelOK(gffFeature) == 0);
+ }
+
+ assertTrue("Stop codon", validate.hasValidStop(f));
+ assertTrue("Internal stop codon", !validate.isInternalStops(f));
+ }
+ }
+ catch(OutOfRangeException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ catch(NoSequenceException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ }
+
+ /**
+ * Test the gene model boundary is consistent
+ */
+ @Test
+ public void testGFFBoundary()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ String id = GeneUtils.getUniqueName(gffFeature);
+ if(id.equals("PF3D7_0200100"))
+ assertTrue("Boundary check: "+id, ValidateFeature.isBoundaryOK(gffFeature) != 0); // boundary not OK
+ else
+ assertTrue("Boundary check: "+id, ValidateFeature.isBoundaryOK(gffFeature) == 0); // boundary OK
+ }
+ }
+ }
+
+ /**
+ * Test the gene model strand is consistent
+ */
+ @Test
+ public void testGFFStrand()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ String id = GeneUtils.getUniqueName(gffFeature);
+ if(id.equals("PF3D7_0200300"))
+ assertTrue("Strand check: "+id, !ValidateFeature.isStrandOK(gffFeature)); // strand not ok
+ else
+ assertTrue("Strand check: "+id, ValidateFeature.isStrandOK(gffFeature)); // strand ok
+ }
+ }
+ }
+
+ /**
+ * Test the CDS has a phase
+ */
+ @Test
+ public void testGFFPhase()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ String id = GeneUtils.getUniqueName(gffFeature);
+ if(id.equals("PF3D7_0200200.1:exon{2,1}"))
+ assertTrue("CDS phase check: "+id, !ValidateFeature.isCDSPhaseOK(gffFeature)); // phase not ok
+ else
+ assertTrue("CDS phase check: "+id, ValidateFeature.isCDSPhaseOK(gffFeature)); // phase ok
+ }
+ }
+ }
+
+ /**
+ * Test the gene models are complete
+ */
+ @Test
+ public void testGFFCompleteGeneModel()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ if(ValidateFeature.isPartOfGene(gffFeature))
+ {
+ String id = GeneUtils.getUniqueName(gffFeature);
+ if(id.startsWith("PF3D7_0200500"))
+ assertTrue("Complete gene model check: "+id, // gene model missing mRNA
+ ValidateFeature.isCompleteGeneModelOK(gffFeature) != 0);
+ else
+ assertTrue("Complete gene model check: "+id, // gene model complete
+ ValidateFeature.isCompleteGeneModelOK(gffFeature) == 0);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Test if the ID GFF3 attribute prefix is consistent within a gene model
+ */
+ @Test
+ public void testGFFId()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ if(ValidateFeature.isPartOfGene(gffFeature))
+ {
+ String id = GeneUtils.getUniqueName(gffFeature);
+ assertTrue("Complete gene model check: "+id,
+ ValidateFeature.isIdPrefixConsistent(gffFeature));
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Test if the Start_range and End_range are constistent within a gene model
+ */
+ @Test
+ public void testGFFPartials()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ if(ValidateFeature.isPartOfGene(gffFeature))
+ {
+ String id = GeneUtils.getUniqueName(gffFeature);
+ assertTrue("Start_range check: "+id,
+ ValidateFeature.isPartialConsistent(gffFeature, "Start_range"));
+ assertTrue("End_range check: "+id,
+ ValidateFeature.isPartialConsistent(gffFeature, "End_range"));
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Test if the Start_range and End_range are constistent within a gene model
+ */
+ @Test
+ public void testGFFAttributes()
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+
+ for(uk.ac.sanger.artemis.io.Feature f: features)
+ {
+ if(ValidateFeature.isGFF(f, null))
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ if(ValidateFeature.isPartOfGene(gffFeature))
+ {
+ String id = GeneUtils.getUniqueName(gffFeature);
+ assertTrue("Attributes : "+id,
+ ValidateFeature.isAttributesOK(gffFeature).length() == 0);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Test stop codons for genes
+ */
+ @Test
+ public void testGFFValidStop()
+ {
+ try
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+ final EntryGroup egrp = new SimpleEntryGroup();
+ egrp.add(new uk.ac.sanger.artemis.Entry(entry));
+ ValidateFeature validate = new ValidateFeature(egrp);
+
+ for (uk.ac.sanger.artemis.io.Feature f : features)
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ String id = GeneUtils.getUniqueName(gffFeature);
+ if( (id.startsWith("PF3D7_0200100") || id.startsWith("PF3D7_0200500.1:exon:2")) &&
+ f.getKey().getKeyString().equals("CDS"))
+ assertTrue("Stop codon "+id, !validate.hasValidStop(f)); // not valid
+ else
+ assertTrue("Stop codon "+id, validate.hasValidStop(f));
+ }
+ }
+ catch (OutOfRangeException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ catch (NoSequenceException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ }
+
+ /**
+ * Test for internal stop codons
+ */
+ @Test
+ public void testGFFInternalStop()
+ {
+ try
+ {
+ final Entry entry = getEntry("/data/test_boundary.gff.gz");
+ final FeatureVector features = entry.getAllFeatures();
+ final EntryGroup egrp = new SimpleEntryGroup();
+ egrp.add(new uk.ac.sanger.artemis.Entry(entry));
+ ValidateFeature validate = new ValidateFeature(egrp);
+
+ for (uk.ac.sanger.artemis.io.Feature f : features)
+ {
+ GFFStreamFeature gffFeature = (GFFStreamFeature)f;
+ String id = GeneUtils.getUniqueName(gffFeature);
+ assertTrue("Internal stop codon "+id, !validate.isInternalStops(f));
+ }
+ }
+ catch (OutOfRangeException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ catch (NoSequenceException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ }
+
+
+ private Entry getEntry(final String gff)
+ {
+ try
+ {
+ URL gffFile = ValidateFeatureTest.class.getResource(gff);
+ final Document doc = DocumentFactory.makeDocument(gffFile.getFile());
+ return DocumentEntryFactory.makeDocumentEntry(
+ Options.getArtemisEntryInformation(),doc,null);
+ }
+ catch(EntryInformationException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ catch(IOException e)
+ {
+ Assert.fail(e.getMessage());
+ }
+ return null;
+ }
+}
\ No newline at end of file
--
GitLab