From 72ef2ef51ccc6fd83ae04b9486252f197443a2dd Mon Sep 17 00:00:00 2001 From: tcarver <tjc> Date: Mon, 9 Sep 2013 09:22:59 +0100 Subject: [PATCH] validation tests --- .../artemis/io/ValidateFeatureTest.java | 340 ++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java diff --git a/test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java b/test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java new file mode 100644 index 000000000..bbafa43ff --- /dev/null +++ b/test/uk/ac/sanger/artemis/io/ValidateFeatureTest.java @@ -0,0 +1,340 @@ +package uk.ac.sanger.artemis.io; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.URL; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.log4j.Level; +import org.junit.Test; + +import junit.framework.Assert; + +import uk.ac.sanger.artemis.EntryGroup; +import uk.ac.sanger.artemis.Options; +import uk.ac.sanger.artemis.SimpleEntryGroup; +import uk.ac.sanger.artemis.components.EntryFileDialog; +import uk.ac.sanger.artemis.components.genebuilder.GeneUtils; +import uk.ac.sanger.artemis.io.DocumentEntryFactory; +import uk.ac.sanger.artemis.io.Entry; +import uk.ac.sanger.artemis.io.EntryInformationException; +import uk.ac.sanger.artemis.io.FeatureVector; +import uk.ac.sanger.artemis.io.GFFStreamFeature; +import uk.ac.sanger.artemis.io.ValidateFeature; +import uk.ac.sanger.artemis.util.Document; +import uk.ac.sanger.artemis.util.DocumentFactory; +import uk.ac.sanger.artemis.util.OutOfRangeException; +import uk.ac.sanger.artemis.sequence.NoSequenceException; + +public class ValidateFeatureTest +{ + @Test + public void testGFF() + { + try + { + final Entry entry = getEntry("/data/test.gff.gz"); + final EntryGroup egrp = new SimpleEntryGroup(); + egrp.add(new uk.ac.sanger.artemis.Entry(entry)); + ValidateFeature validate = new ValidateFeature(egrp); + + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + String id = GeneUtils.getUniqueName(gffFeature); + + assertTrue("Boundary "+id, ValidateFeature.isBoundaryOK(gffFeature) == 0); + assertTrue("Strand "+id, ValidateFeature.isStrandOK(gffFeature)); + assertTrue("CDS phase "+id, ValidateFeature.isCDSPhaseOK(gffFeature)); + assertTrue("Attribute "+id, ValidateFeature.isAttributesOK(gffFeature).length() == 0); + assertTrue("ID check "+id, ValidateFeature.isIdPrefixConsistent(gffFeature)); + assertTrue("Start_range check "+id, ValidateFeature.isPartialConsistent(gffFeature, "Start_range")); + assertTrue("End_range check "+id, ValidateFeature.isPartialConsistent(gffFeature, "End_range")); + + if(ValidateFeature.isPartOfGene(gffFeature)) + assertTrue("Gene model "+id, ValidateFeature.isCompleteGeneModelOK(gffFeature) == 0); + } + + assertTrue("Stop codon", validate.hasValidStop(f)); + assertTrue("Internal stop codon", !validate.isInternalStops(f)); + } + } + catch(OutOfRangeException e) + { + Assert.fail(e.getMessage()); + } + catch(NoSequenceException e) + { + Assert.fail(e.getMessage()); + } + } + + /** + * Test the gene model boundary is consistent + */ + @Test + public void testGFFBoundary() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + String id = GeneUtils.getUniqueName(gffFeature); + if(id.equals("PF3D7_0200100")) + assertTrue("Boundary check: "+id, ValidateFeature.isBoundaryOK(gffFeature) != 0); // boundary not OK + else + assertTrue("Boundary check: "+id, ValidateFeature.isBoundaryOK(gffFeature) == 0); // boundary OK + } + } + } + + /** + * Test the gene model strand is consistent + */ + @Test + public void testGFFStrand() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + String id = GeneUtils.getUniqueName(gffFeature); + if(id.equals("PF3D7_0200300")) + assertTrue("Strand check: "+id, !ValidateFeature.isStrandOK(gffFeature)); // strand not ok + else + assertTrue("Strand check: "+id, ValidateFeature.isStrandOK(gffFeature)); // strand ok + } + } + } + + /** + * Test the CDS has a phase + */ + @Test + public void testGFFPhase() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + String id = GeneUtils.getUniqueName(gffFeature); + if(id.equals("PF3D7_0200200.1:exon{2,1}")) + assertTrue("CDS phase check: "+id, !ValidateFeature.isCDSPhaseOK(gffFeature)); // phase not ok + else + assertTrue("CDS phase check: "+id, ValidateFeature.isCDSPhaseOK(gffFeature)); // phase ok + } + } + } + + /** + * Test the gene models are complete + */ + @Test + public void testGFFCompleteGeneModel() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + if(ValidateFeature.isPartOfGene(gffFeature)) + { + String id = GeneUtils.getUniqueName(gffFeature); + if(id.startsWith("PF3D7_0200500")) + assertTrue("Complete gene model check: "+id, // gene model missing mRNA + ValidateFeature.isCompleteGeneModelOK(gffFeature) != 0); + else + assertTrue("Complete gene model check: "+id, // gene model complete + ValidateFeature.isCompleteGeneModelOK(gffFeature) == 0); + } + } + } + } + + + /** + * Test if the ID GFF3 attribute prefix is consistent within a gene model + */ + @Test + public void testGFFId() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + if(ValidateFeature.isPartOfGene(gffFeature)) + { + String id = GeneUtils.getUniqueName(gffFeature); + assertTrue("Complete gene model check: "+id, + ValidateFeature.isIdPrefixConsistent(gffFeature)); + } + } + } + } + + + /** + * Test if the Start_range and End_range are constistent within a gene model + */ + @Test + public void testGFFPartials() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + if(ValidateFeature.isPartOfGene(gffFeature)) + { + String id = GeneUtils.getUniqueName(gffFeature); + assertTrue("Start_range check: "+id, + ValidateFeature.isPartialConsistent(gffFeature, "Start_range")); + assertTrue("End_range check: "+id, + ValidateFeature.isPartialConsistent(gffFeature, "End_range")); + } + } + } + } + + + /** + * Test if the Start_range and End_range are constistent within a gene model + */ + @Test + public void testGFFAttributes() + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + + for(uk.ac.sanger.artemis.io.Feature f: features) + { + if(ValidateFeature.isGFF(f, null)) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + if(ValidateFeature.isPartOfGene(gffFeature)) + { + String id = GeneUtils.getUniqueName(gffFeature); + assertTrue("Attributes : "+id, + ValidateFeature.isAttributesOK(gffFeature).length() == 0); + } + } + } + } + + + /** + * Test stop codons for genes + */ + @Test + public void testGFFValidStop() + { + try + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + final EntryGroup egrp = new SimpleEntryGroup(); + egrp.add(new uk.ac.sanger.artemis.Entry(entry)); + ValidateFeature validate = new ValidateFeature(egrp); + + for (uk.ac.sanger.artemis.io.Feature f : features) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + String id = GeneUtils.getUniqueName(gffFeature); + if( (id.startsWith("PF3D7_0200100") || id.startsWith("PF3D7_0200500.1:exon:2")) && + f.getKey().getKeyString().equals("CDS")) + assertTrue("Stop codon "+id, !validate.hasValidStop(f)); // not valid + else + assertTrue("Stop codon "+id, validate.hasValidStop(f)); + } + } + catch (OutOfRangeException e) + { + Assert.fail(e.getMessage()); + } + catch (NoSequenceException e) + { + Assert.fail(e.getMessage()); + } + } + + /** + * Test for internal stop codons + */ + @Test + public void testGFFInternalStop() + { + try + { + final Entry entry = getEntry("/data/test_boundary.gff.gz"); + final FeatureVector features = entry.getAllFeatures(); + final EntryGroup egrp = new SimpleEntryGroup(); + egrp.add(new uk.ac.sanger.artemis.Entry(entry)); + ValidateFeature validate = new ValidateFeature(egrp); + + for (uk.ac.sanger.artemis.io.Feature f : features) + { + GFFStreamFeature gffFeature = (GFFStreamFeature)f; + String id = GeneUtils.getUniqueName(gffFeature); + assertTrue("Internal stop codon "+id, !validate.isInternalStops(f)); + } + } + catch (OutOfRangeException e) + { + Assert.fail(e.getMessage()); + } + catch (NoSequenceException e) + { + Assert.fail(e.getMessage()); + } + } + + + private Entry getEntry(final String gff) + { + try + { + URL gffFile = ValidateFeatureTest.class.getResource(gff); + final Document doc = DocumentFactory.makeDocument(gffFile.getFile()); + return DocumentEntryFactory.makeDocumentEntry( + Options.getArtemisEntryInformation(),doc,null); + } + catch(EntryInformationException e) + { + Assert.fail(e.getMessage()); + } + catch(IOException e) + { + Assert.fail(e.getMessage()); + } + return null; + } +} \ No newline at end of file -- GitLab