diff --git a/.classpath b/.classpath index 3d63d381df73f32044caca209147d811f86d7b60..200d9806da25967234170c1b5c51370cf62683a0 100644 --- a/.classpath +++ b/.classpath @@ -1,6 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> <classpath> <classpathentry kind="src" path="corba"/> + <classpathentry kind="src" path="lib"/> <classpathentry including="nsdb/|seqdb/|type/" kind="src" path="ant-build/src/main"/> <classpathentry excluding="ant-build/src/main/|uk/ac/sanger/artemis/ExternalProgramUtils.java" including="nsdb/|org/|seqdb/|type/|uk/" kind="src" path=""/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> @@ -23,5 +24,6 @@ <classpathentry kind="lib" path="lib/batik/batik-svggen.jar"/> <classpathentry kind="lib" path="lib/batik/batik-util.jar"/> <classpathentry kind="lib" path="lib/batik/batik-xml.jar"/> + <classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/> <classpathentry kind="output" path="ant-build/classes/main"/> </classpath> diff --git a/ChangeLog b/ChangeLog index ac4dd048395daa01284b23dad4492a9d26cf6eb6..612a9bab76c4cd92381aa2e070877f68f7da2c8c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -Version XX +Version 16 Add 'Features Within Selection' option to the 'Select' menu to select features that are contained by a selected base range. diff --git a/Makefile b/Makefile index 8dc99b0bb2a7e42b5d43bea05d46c8bde2f5a72e..8d27e27af0755be37cea8ca51ba6bceb2d6957bb 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ SHELL=/bin/sh JAVAC := javac -source 1.5 -target 1.5 $(OPT_FLAGS) $(EXTRA_FLAGS) -REAL_CLASSPATH := CLASSPATH=lib/biojava.jar:lib/jemAlign.jar:lib/j2ssh/j2ssh-core.jar:lib/ibatis/ibatis-2.3.4.726.jar:lib/ibatis/log4j-1.2.14.jar:lib/postgresql-8.4-701.jdbc3.jar:lib/picard/picard.jar:lib/picard/sam.jar:lib/commons-net-2.2.jar:lib/batik/batik-awt-util.jar:lib/batik/batik-dom.jar:lib/batik/batik-ext.jar:lib/batik/batik-svggen.jar:lib/batik/batik-util.jar:lib/batik/batik-xml.jar:. +REAL_CLASSPATH := CLASSPATH=lib/commons-lang-2.6.jar:lib/biojava.jar:lib/jemAlign.jar:lib/j2ssh/j2ssh-core.jar:lib/ibatis/ibatis-2.3.4.726.jar:lib/ibatis/log4j-1.2.14.jar:lib/postgresql-8.4-701.jdbc3.jar:lib/picard/picard.jar:lib/picard/sam.jar:lib/commons-net-2.2.jar:lib/batik/batik-awt-util.jar:lib/batik/batik-dom.jar:lib/batik/batik-ext.jar:lib/batik/batik-svggen.jar:lib/batik/batik-util.jar:lib/batik/batik-xml.jar:. # NAMES:= \ # uk/ac/sanger/artemis/OptionChangeListener \ diff --git a/etc/writedb_entry b/etc/writedb_entry index d747fc495a7097537be2dc50c4345ca6d1d3a054..12e7f55bd481f34ce6093299e205d19c2daf9f82 100755 --- a/etc/writedb_entry +++ b/etc/writedb_entry @@ -1,6 +1,6 @@ #!/bin/sh - -# -# This script reads and writes entries from a database. +# +# This script reads and writes entries from a database. # Examples: # writedb_entry -help # writedb_entry -s Pf3D7_01 Pf3D7_05 Pf3D7_07 @@ -28,6 +28,7 @@ done ARTEMIS_HOME=`dirname "$PRG"`/.. CLASSPATH="$ARTEMIS_HOME:$ARTEMIS_HOME/lib/biojava.jar:$ARTEMIS_HOME/lib/jemAlign.jar:$ARTEMIS_HOME/lib/jakarta-regexp-1.2.jar:$ARTEMIS_HOME/lib/macos.jar:$ARTEMIS_HOME/lib/postgresql-8.4-701.jdbc3.jar:$ARTEMIS_HOME/lib/chado-14-interface.jar:$CLASSPATH" +CLASSPATH="$CLASSPATH:$ARTEMIS_HOME/lib/commons-lang-2.6.jar" # iBatis jars CLASSPATH="$CLASSPATH:$ARTEMIS_HOME/lib/ibatis/ibatis-2.3.4.726.jar:$ARTEMIS_HOME/lib/ibatis/:$ARTEMIS_HOME/lib/ibatis/log4j-1.2.14.jar:$ARTEMIS_HOME/lib/ibatis/cglib-nodep-2.2.jar:$ARTEMIS_HOME/lib/retrotranslator-runtime-1.1.0.jar" @@ -42,7 +43,7 @@ while test $# != 0 case $1 in -Dchado*) DEFAULT_CONNECTION="$1" ;; - -D*) + -D*) FLAGS="$FLAGS $1" ;; *) break ;; esac @@ -55,7 +56,7 @@ for arg in "$@" do if [ '-c' == "${arg}" ]; then let "nextID = $idx + 1"; - DEFAULT_CONNECTION="-Dchado=${!nextID}" + DEFAULT_CONNECTION="-Dchado=${!nextID}" fi if [ '-l' == "${arg}" ]; then let "nextID = $idx + 2"; diff --git a/lib/commons-lang-2.6.jar b/lib/commons-lang-2.6.jar new file mode 100644 index 0000000000000000000000000000000000000000..98467d3a653ebad776ffa3542efeb9732fe0b482 Binary files /dev/null and b/lib/commons-lang-2.6.jar differ diff --git a/test/uk/ac/sanger/artemis/io/GFF3AttributeBuilderTest.java b/test/uk/ac/sanger/artemis/io/GFF3AttributeBuilderTest.java new file mode 100644 index 0000000000000000000000000000000000000000..e65c526b62b2ffe244eae2f73c48429e7da1187f --- /dev/null +++ b/test/uk/ac/sanger/artemis/io/GFF3AttributeBuilderTest.java @@ -0,0 +1,295 @@ +/* + * This file is part of Artemis + * + * Copyright (C) 2014 Genome Research Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +package uk.ac.sanger.artemis.io; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import junit.framework.Assert; + +import org.junit.Test; +import org.junit.Before; + +import uk.ac.sanger.artemis.io.GFF3AttributeAggregator; +import uk.ac.sanger.artemis.io.GFF3AttributeBuilder; +import uk.ac.sanger.artemis.io.GFF3Encoder; +import uk.ac.sanger.artemis.util.StringVector; + +public class GFF3AttributeBuilderTest { + private GFF3Encoder enc; + private String[] invals = { "foo", "bar" }; + private String[] invalsC = { "foo,bar", "baz,quux" }; + private static GFF3AttributeAggregator testProc; + + @Before + public void setUp() { + enc = new GFF3Encoder(); + testProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + buffer.append(">>" + + GFF3Encoder.encode(values.elementAt(value_index)) + "<<"); + if (value_index < (values.size()) - 1) + buffer.append("|"); + } + } + return buffer.toString(); + } + }; + } + + @Test + /** + * Tests adding one attribute. + */ + public void testAdd1() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=foo,bar"); + } + + @Test + /** + * Tests adding two different attributes. + */ + public void testAdd2() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr1=foo,bar;attr2=foo,bar"); + } + + @Test + /** + * Tests adding attributes with custom aggregators. + */ + public void testAddWithAggs() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + ab.setAggregator("attr1", testProc); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=>>foo<<|>>bar<<"); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=>>foo<<|>>bar<< >>foo<<|>>bar<<"); + } + + @Test + /** + * Tests adding attributes (encoded values) with custom aggregators. + */ + public void testAddWithAggsCommas() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invalsC); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=foo%2Cbar,baz%2Cquux"); + ab.add("attr1", in); + assertEquals(ab.toString(), + "attr1=foo%2Cbar,baz%2Cquux foo%2Cbar,baz%2Cquux"); + ab = new GFF3AttributeBuilder(); + ab.setAggregator("attr1", testProc); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=>>foo%2Cbar<<|>>baz%2Cquux<<"); + ab.add("attr1", in); + assertEquals(ab.toString(), + "attr1=>>foo%2Cbar<<|>>baz%2Cquux<< >>foo%2Cbar<<|>>baz%2Cquux<<"); + } + + @Test + /** + * Tests the ignoring of attribute fields in the output. + */ + public void testIgnore() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.ignore("attr1"); + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr2=foo,bar"); + ab.unignore("attr1"); + assertEquals(ab.toString(), "attr1=foo,bar;attr2=foo,bar"); + } + + @Test + /** + * Tests the handling of duplicate attributes. + */ + public void testAddMultiAttr() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.add("attr1", in); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=foo,bar foo,bar"); + } + + @Test + /** + * Tests the handling of duplicate attributes, with delimiter. + */ + public void testAddMultiAttrWithGlue() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setGlue("attr1", "X"); + ab.add("attr1", in); + ab.add("attr1", in); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=foo,barXfoo,barXfoo,bar"); + } + + @Test + /** + * Tests cloning of attributes to separate keys with default aggregator. + */ + public void testClone() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setClone("attr1", "brand_new"); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=foo,bar;brand_new=foo,bar"); + ab.add("brand_new", in); + assertEquals(ab.toString(), "attr1=foo,bar;brand_new=foo,bar foo,bar"); + } + + @Test + /** + * Tests cloning of attributes to separate keys with different aggregators. + */ + public void testCloneWithAggs() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + ab.setClone("attr1", "brand_new"); + ab.setAggregator("brand_new", testProc); + ab.add("attr1", in); + assertEquals(ab.toString(), "attr1=foo,bar;brand_new=>>foo<<|>>bar<<"); + ab.add("attr1", in); + assertEquals(ab.toString(), + "attr1=foo,bar foo,bar;brand_new=>>foo<<|>>bar<< >>foo<<|>>bar<<"); + } + + @Test + /** + * Tests mapping/cloning of attributes to separate keys with different aggregators. + */ + public void testMappingAndCloneWithAggs1() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setMapping("attr1", "aaaa"); + ab.setClone("aaaa", "brand_new"); + ab.setAggregator("brand_new", testProc); + ab.add("attr1", in); + assertEquals(ab.toString(), "aaaa=foo,bar;brand_new=>>foo<<|>>bar<<"); + ab.add("attr1", in); + assertEquals(ab.toString(), + "aaaa=foo,bar foo,bar;brand_new=>>foo<<|>>bar<< >>foo<<|>>bar<<"); + } + + @Test + /** + * Tests mapping/cloning of attributes to separate keys with different aggregators. + */ + public void testMappingAndCloneWithAggs2() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setMapping("attr1", "aaaa"); + ab.setClone("attr1", "brand_new"); + ab.setAggregator("brand_new", testProc); + ab.add("attr1", in); + assertEquals(ab.toString(), "aaaa=foo,bar;brand_new=>>foo<<|>>bar<<"); + ab.add("attr1", in); + assertEquals(ab.toString(), + "aaaa=foo,bar foo,bar;brand_new=>>foo<<|>>bar<< >>foo<<|>>bar<<"); + } + + @Test + /** + * Tests mapping one attribute to a new name. + */ + public void testMapping() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setMapping("attr1", "brand_new"); + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr2=foo,bar;brand_new=foo,bar"); + } + + @Test + /** + * Tests mapping one attribute to a new name with custom target aggregator. + */ + public void testMappingWithAggs() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + ab.setMapping("attr1", "brand_new"); + ab.setAggregator("brand_new", testProc); + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr2=foo,bar;brand_new=>>foo<<|>>bar<<"); + } + + @Test + /** + * Tests mapping one attribute to a new name with custom target aggregator. + */ + public void testMappingCollision() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setMapping("attr1", "attr2"); + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr2=foo,bar foo,bar"); + } + + @Test + /** + * Tests mapping one attribute to a new name with custom target aggregator. + */ + public void testMappingCollisionWithAggs() { + GFF3AttributeBuilder ab = new GFF3AttributeBuilder(); + StringVector in = new StringVector(invals); + + ab.setMapping("attr1", "attr2"); + ab.setAggregator("attr2", testProc); + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr2=>>foo<<|>>bar<< >>foo<<|>>bar<<"); + ab = new GFF3AttributeBuilder(); + ab.setMapping("attr1", "attr2"); + ab.setAggregator("attr1", testProc); + ab.add("attr1", in); + ab.add("attr2", in); + assertEquals(ab.toString(), "attr2=>>foo<<|>>bar<< foo,bar"); + } +} diff --git a/test/uk/ac/sanger/artemis/io/GFF3EncoderTest.java b/test/uk/ac/sanger/artemis/io/GFF3EncoderTest.java new file mode 100644 index 0000000000000000000000000000000000000000..56f8a5a4d4dbafc4bf0b9352f0478c0ec803d8a4 --- /dev/null +++ b/test/uk/ac/sanger/artemis/io/GFF3EncoderTest.java @@ -0,0 +1,102 @@ +/* + * This file is part of Artemis + * + * Copyright (C) 2014 Genome Research Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +package uk.ac.sanger.artemis.io; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import junit.framework.Assert; + +import org.junit.Test; +import org.junit.Before; + +import uk.ac.sanger.artemis.io.GFF3Encoder; + +public class GFF3EncoderTest { + private GFF3Encoder enc; + + @Before + public void setUp() { + enc = new GFF3Encoder(); + } + + @Test + /** + * Tests whether there are missing map characters. + */ + public void testMapEqualLength() { + assertEquals(GFF3Encoder.mapChars.length, GFF3Encoder.mappedEscapes.length); + } + + @Test + /** + * Tests whether the mapping is correct according to GFF3 spec. + */ + public void testMapChars() { + assertEquals(enc.encode("%"), "%25"); + assertEquals(enc.decode("%25"), "%"); + assertEquals(enc.encode("&"), "%26"); + assertEquals(enc.decode("%26"), "&"); + assertEquals(enc.encode(","), "%2C"); + assertEquals(enc.decode("%2C"), ","); + assertEquals(enc.encode(";"), "%3B"); + assertEquals(enc.decode("%3B"), ";"); + assertEquals(enc.encode("="), "%3D"); + assertEquals(enc.decode("%3D"), "="); + assertEquals(enc.encode("\t"), "%09"); + assertEquals(enc.decode("%09"), "\t"); + assertEquals(enc.encode("\n"), "%0A"); + assertEquals(enc.decode("%0A"), "\n"); + assertEquals(enc.encode("\r"), "%0D"); + assertEquals(enc.decode("%0D"), "\r"); + } + + @Test + /** + * Tests the decoding of escaped characters in GFF files. + */ + public void testDecode() { + for (int i=0; i < GFF3Encoder.mappedEscapes.length; i++) { + assertEquals(enc.decode("test"+GFF3Encoder.mappedEscapes[i]+"foo"), + "test"+GFF3Encoder.mapChars[i]+"foo"); + assertEquals(enc.decode("test%"+GFF3Encoder.mappedEscapes[i]+"foo"), + "test%"+GFF3Encoder.mapChars[i]+"foo"); + assertEquals(enc.decode("test1"+GFF3Encoder.mappedEscapes[i]+"foo," + + "test2"+GFF3Encoder.mappedEscapes[i]), + "test1"+GFF3Encoder.mapChars[i]+"foo," + + "test2"+GFF3Encoder.mapChars[i]); + } + } + + @Test + /** + * Tests the encoding of escaped characters in GFF files. + */ + public void testEncode() { + for (int i=0; i < GFF3Encoder.mappedEscapes.length; i++) { + assertEquals(enc.encode("test"+GFF3Encoder.mapChars[i]+"foo"), + "test"+GFF3Encoder.mappedEscapes[i]+"foo"); + assertEquals(enc.encode("test%"+GFF3Encoder.mapChars[i]+"foo"), + "test%25"+GFF3Encoder.mappedEscapes[i]+"foo"); + } + } + +} diff --git a/uk/ac/sanger/artemis/chado/ArtemisUtils.java b/uk/ac/sanger/artemis/chado/ArtemisUtils.java index f0eea5f306acbd15af4d5c04e2f7f0e988c8e2bf..277a787c2958517ce3851c4832b6d7ffe40e17d2 100644 --- a/uk/ac/sanger/artemis/chado/ArtemisUtils.java +++ b/uk/ac/sanger/artemis/chado/ArtemisUtils.java @@ -47,6 +47,7 @@ import org.gmod.schema.sequence.FeatureProp; import org.gmod.schema.sequence.FeatureRelationship; import uk.ac.sanger.artemis.io.DatabaseDocumentEntry; +import uk.ac.sanger.artemis.io.GFF3Encoder; import uk.ac.sanger.artemis.io.GFFStreamFeature; import uk.ac.sanger.artemis.util.DatabaseDocument; import uk.ac.sanger.artemis.util.StringVector; @@ -262,7 +263,7 @@ public class ArtemisUtils if(this_fcp.getValue().equals(fcp.getValue())) return true; - if(this_fcp.getValue().equals(GFFStreamFeature.decode(fcp.getValue()))) + if(this_fcp.getValue().equals(GFF3Encoder.decode(fcp.getValue()))) return true; } return false; diff --git a/uk/ac/sanger/artemis/components/genebuilder/GeneUtils.java b/uk/ac/sanger/artemis/components/genebuilder/GeneUtils.java index bee966d31a6871bce8f6089e57f9d5445af15eb0..7d8f1607d6efa7ded64018cab65a1c165fbf2ac7 100644 --- a/uk/ac/sanger/artemis/components/genebuilder/GeneUtils.java +++ b/uk/ac/sanger/artemis/components/genebuilder/GeneUtils.java @@ -61,6 +61,7 @@ import uk.ac.sanger.artemis.io.DatabaseInferredFeature; import uk.ac.sanger.artemis.io.DocumentEntry; import uk.ac.sanger.artemis.io.EntryInformationException; import uk.ac.sanger.artemis.io.Feature; +import uk.ac.sanger.artemis.io.GFF3Encoder; import uk.ac.sanger.artemis.io.GFFDocumentEntry; import uk.ac.sanger.artemis.io.GFFStreamFeature; import uk.ac.sanger.artemis.io.InvalidRelationException; @@ -126,7 +127,7 @@ public class GeneUtils String value = featureSynonym.getSynonym().getName(); if(!featureSynonym.isCurrent()) - value.concat(GFFStreamFeature.encode(";current=false")); + value.concat(GFF3Encoder.encode(";current=false")); Qualifier qualifier = feature.getQualifiers().getQualifierByName(name); if(qualifier == null) @@ -198,7 +199,7 @@ public class GeneUtils final String qualifierString = new String(this_buff.getBytes()); int ind = qualifierString.indexOf('='); final String name = qualifierString.substring(0, ind); - final String value = GFFStreamFeature.decode( + final String value = GFF3Encoder.decode( qualifierString.substring(ind+1, qualifierString.length()-1)); Qualifier qualifier = feature.getQualifiers().getQualifierByName(name); diff --git a/uk/ac/sanger/artemis/io/GFF3AttributeAggregator.java b/uk/ac/sanger/artemis/io/GFF3AttributeAggregator.java new file mode 100644 index 0000000000000000000000000000000000000000..4a8a737a30b5fb2695e0614e7a1dd2166a1293ad --- /dev/null +++ b/uk/ac/sanger/artemis/io/GFF3AttributeAggregator.java @@ -0,0 +1,38 @@ +/* GFF3AttributeAggregator.java + * * + * This file is part of Artemis + * + * Copyright (C) 2014 Genome Research Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +package uk.ac.sanger.artemis.io; + +import uk.ac.sanger.artemis.util.StringVector; + +/** + * Abstraction for processing various Artemis qualifiers into usable GFF3 strings. + */ +public interface GFF3AttributeAggregator { + /** + * Prepare a set of string values as a GFF attribute value. + * @param values the value set to convert to a <code>String</code> + * @return the <code>String</code> representation + */ + public abstract String process(StringVector values); + +} diff --git a/uk/ac/sanger/artemis/io/GFF3AttributeBuilder.java b/uk/ac/sanger/artemis/io/GFF3AttributeBuilder.java new file mode 100644 index 0000000000000000000000000000000000000000..f7b1db25177bdafbdfa4fc48ca6b15ff248ad0d4 --- /dev/null +++ b/uk/ac/sanger/artemis/io/GFF3AttributeBuilder.java @@ -0,0 +1,212 @@ +/* GFF3AttributeBuilder.java + * * + * This file is part of Artemis + * + * Copyright (C) 2014 Genome Research Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +package uk.ac.sanger.artemis.io; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.lang.StringBuilder; + +import uk.ac.sanger.artemis.io.EntryInformationException; +import uk.ac.sanger.artemis.io.GFF3AttributeAggregator; +import uk.ac.sanger.artemis.util.StringVector; + +public class GFF3AttributeBuilder { + private HashMap<String, String> attrs = new HashMap<String, String>(); + private HashMap<String, String> mappings = new HashMap<String, String>(); + private HashMap<String, String> glue = new HashMap<String, String>(); + private HashSet<String> ignores = new HashSet<String>(); + private HashMap<String, String> clones = new HashMap<String, String>(); + private HashMap<String, GFF3AttributeAggregator> aggs = new HashMap<String, GFF3AttributeAggregator>(); + private HashSet<String> reserved = new HashSet<String>( + 13); + private static final GFF3AttributeAggregator defaultAgg = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + final String this_value = GFF3Encoder.encode(values.elementAt(value_index)); + if (value_index > 0 && value_index < (values.size())) { + buffer.append(","); + } + buffer.append(this_value); + } + } + return buffer.toString(); + } + }; + + final String reserved_a[] = { "ID", + "Name", "Alias", "Parent", "Derives_from", "Target", "Gap", "Note", + "Dbxref", "Ontology_term", "Start_range", "End_range", "Is_circular" }; + + public GFF3AttributeBuilder() { + for (String s : reserved_a) { + reserved.add(s); + } + } + + public void setMapping(String attr, String mapsto) { + /* XXX: make sure only one level of mapping is used */ + mappings.put(attr, mapsto); + } + + public void unsetMapping(String attr, String mapsto) { + mappings.remove(attr); + } + + public void setGlue(String attr, String glue_str) { + glue.put(attr, glue_str); + } + + public void unsetGlue(String attr) { + glue.remove(attr); + } + + public void setClone(String attr, String glue_str) { + clones.put(attr, glue_str); + } + + public void unsetClone(String attr) { + clones.remove(attr); + } + + public void setAggregator(String attr, GFF3AttributeAggregator agg) { + aggs.put(attr, agg); + } + + public void unsetAggregator(String attr, GFF3AttributeAggregator agg) { + aggs.remove(attr); + } + + public void ignore(String attr) { + ignores.add(attr); + } + + public void unignore(String attr) { + ignores.remove(attr); + } + + public void add(String attr, String val) { + StringVector v = new StringVector(val); + add(attr, v); + } + + public void add(String attr, StringVector val) { + String origAttr = attr; + ArrayList<String> targetAttrs = new ArrayList<String>(); + // expand attributes + if (clones.containsKey(attr)) + targetAttrs.add(clones.get(attr)); + if (mappings.containsKey(attr)) { + attr = mappings.get(attr); + targetAttrs.add(attr); + if (clones.containsKey(attr)) + targetAttrs.add(clones.get(attr)); + } else { + targetAttrs.add(attr); + } + // drop attributes with null or empty values + if (val == null || (val.size() == 1 + && val.elementAt(0).replaceAll("\\s+", "").equals("")) ) + return; + // process expanded list of attributes + for (String this_attr : targetAttrs) { + String aggregatedVal; + // do we have an aggregator for this type? + if (aggs.containsKey(origAttr)) { + GFF3AttributeAggregator agg = aggs.get(origAttr); + aggregatedVal = agg.process(val); + } else if (aggs.containsKey(this_attr)) { + GFF3AttributeAggregator agg = aggs.get(this_attr); + aggregatedVal = agg.process(val); + } else { + aggregatedVal = defaultAgg.process(val); + } + // do not add empty values + if (aggregatedVal == null) + return; + // append or set? + if (attrs.containsKey(this_attr)) { + String this_val = attrs.get(this_attr), + this_glue = " "; + if (glue.containsKey(this_attr)) + this_glue = glue.get(this_attr); + this_val = this_val + this_glue + aggregatedVal; + attrs.put(this_attr, this_val); + } else { + attrs.put(this_attr, aggregatedVal); + } + } + } + + private String decapitalize(String line) { + if (!reserved.contains(line) + && Character.toUpperCase(line.charAt(0)) == line.charAt(0)) { + return Character.toLowerCase(line.charAt(0)) + line.substring(1); + } else { + return line; + } + } + + public String get(String attr) throws EntryInformationException { + if (mappings.containsKey(attr)) { + attr = mappings.get(attr); + } + if (attrs.containsKey(attr)) { + return attrs.get(attr); + } else { + throw new EntryInformationException("empty attribute value for " + attr); + } + } + + private Comparator<String> comparator = new Comparator<String>() { + // make sure 'ID' is always at the beginning of the attribute list + public int compare(String o1, String o2) { + if (o1.equals("ID")) + return -1; + if (o2.equals("ID")) + return 1; + return o1.compareTo(o2); + } + }; + + public String toString() { + StringBuilder b = new StringBuilder(); + int i = 0; + ArrayList<String> sortedAttrs = new ArrayList<String>(attrs.keySet()); + Collections.sort(sortedAttrs, comparator); + for (String key : sortedAttrs) { + if (!ignores.contains(key)) { + String value = attrs.get(key); + if (i++ != 0) + b.append(";"); + b.append(decapitalize(key) + "=" + value); + } + } + return b.toString(); + } +} diff --git a/uk/ac/sanger/artemis/io/GFF3Encoder.java b/uk/ac/sanger/artemis/io/GFF3Encoder.java new file mode 100644 index 0000000000000000000000000000000000000000..49f5507a61452ab3eb77a7b16583c7cd2f4b2b6d --- /dev/null +++ b/uk/ac/sanger/artemis/io/GFF3Encoder.java @@ -0,0 +1,38 @@ +/* GFF3Encoder.java + * * + * This file is part of Artemis + * + * Copyright (C) 2014 Genome Research Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +package uk.ac.sanger.artemis.io; + +import org.apache.commons.lang.StringUtils; + +public class GFF3Encoder { + public static final String mapChars[] = { "%", "&", ",", ";", "=", "\t", "\n", "\r" }; + public static final String mappedEscapes[] = { "%25", "%26", "%2C", "%3B", "%3D", "%09", "%0A", "%0D" }; + + public static String decode(String s) { + return StringUtils.replaceEach(s, mappedEscapes, mapChars); + } + + public static String encode(String s) { + return StringUtils.replaceEach(s, mapChars, mappedEscapes); + } +} diff --git a/uk/ac/sanger/artemis/io/GFFStreamFeature.java b/uk/ac/sanger/artemis/io/GFFStreamFeature.java index 8e68ac3c0d8bbc09c0919b8e72cd5858048fc43b..bfec1da894ca1d501f6e59cf8c971b9984bae9ac 100644 --- a/uk/ac/sanger/artemis/io/GFFStreamFeature.java +++ b/uk/ac/sanger/artemis/io/GFFStreamFeature.java @@ -25,7 +25,6 @@ package uk.ac.sanger.artemis.io; - import java.util.Hashtable; import java.util.HashSet; import java.util.Enumeration; @@ -47,330 +46,273 @@ import uk.ac.sanger.artemis.util.LinePushBackReader; import uk.ac.sanger.artemis.util.OutOfRangeException; import uk.ac.sanger.artemis.util.ReadOnlyException; import uk.ac.sanger.artemis.util.StringVector; - +import uk.ac.sanger.artemis.io.GFF3Encoder; /** - * A StreamFeature that thinks it is a GFF feature. - * @author Kim Rutherford + * A StreamFeature that thinks it is a GFF feature. + * + * @author Kim Rutherford **/ -public class GFFStreamFeature extends SimpleDocumentFeature - implements DocumentFeature, StreamFeature, ComparableFeature -{ +public class GFFStreamFeature extends SimpleDocumentFeature implements + DocumentFeature, StreamFeature, ComparableFeature { - private static org.apache.log4j.Logger logger4j = - org.apache.log4j.Logger.getLogger(GFFStreamFeature.class); + private static org.apache.log4j.Logger logger4j = org.apache.log4j.Logger + .getLogger(GFFStreamFeature.class); /** store for spliced features containing id and range of each segment */ - private Hashtable<String, Range> id_range_store; + private Hashtable<String, Range> id_range_store; /** store a record of the new and old uniquenames that have been changed */ - private Hashtable<String, String> newIdMapToOldId; + private Hashtable<String, String> newIdMapToOldId; /** store the Timestamp for the feature */ - private Timestamp timelastmodified; + private Timestamp timelastmodified; - private ChadoCanonicalGene chadoGene; + private ChadoCanonicalGene chadoGene; - private boolean visible = true; + private boolean visible = true; /** combined feature_relationship.rank store for exons */ - private Hashtable<String, Integer> feature_relationship_rank_store; + private Hashtable<String, Integer> feature_relationship_rank_store; - /** first tabbed parameter */ - private String gffSeqName; + /** first tabbed parameter */ + private String gffSeqName; /** second tabbed parameter */ - private String gffSource; + private String gffSource; /** duplication count */ - private short duplicate = 0; + private short duplicate = 0; protected static Hashtable<String, Range> contig_ranges; - private boolean lazyLoaded = false; - private org.gmod.schema.sequence.Feature chadoLazyFeature; - private boolean readOnlyFeature = false; - - - private static String MAP_DECODE[][] = { - { " ", "%20" }, // white space - { ",", "%2C" }, // comma - { ";", "%3B" }, // semi-colon - { "=", "%3D" }, // equals - { "\t", "%09" }, // tab - { " ", "+" }, // white space - { "+", "%2B" }, - { "(", "%28" }, // left bracket - { ")", "%29" }, // right bracket - { "'", "\"" } - }; - - private static String MAP_ENCODE[][] = { -// { " ", "%20" }, // white space - { ",", "%2C" }, // comma - { ";", "%3B" }, // semi-colon - { "=", "%3D" }, // equals - { "\t", "%09" }, // tab - { "+", "%2B" }, - { " ", "+" }, // white space - { "(", "%28" }, // left bracket - { ")", "%29" }, // right bracket - { "\n", "%5C" } // new-line - }; - - private static Set<String> attrs_to_filter = new HashSet<String>(); + private boolean lazyLoaded = false; + private org.gmod.schema.sequence.Feature chadoLazyFeature; + private boolean readOnlyFeature = false; + + private static Set<String> attrs_to_filter = new HashSet<String>(); /** - * Registers an attribute not to be included in the GFF3 output for - * GFFStreamFeatures - * @param attr The GFF3 attribute to remove + * Registers an attribute not to be included in the GFF3 output for + * GFFStreamFeatures + * + * @param attr + * The GFF3 attribute to remove **/ - public static void removeAttribute(String attr) - { + public static void removeAttribute(String attr) { attrs_to_filter.add(attr); } /** - * Registers an attribute to be included in the GFF3 output for - * GFFStreamFeatures - * @param attr The GFF3 attribute to include + * Registers an attribute to be included in the GFF3 output for + * GFFStreamFeatures + * + * @param attr + * The GFF3 attribute to include **/ - public static void includeAttribute(String attr) - { + public static void includeAttribute(String attr) { attrs_to_filter.remove(attr); } /** - * Create a new GFFStreamFeature object. The feature should be added - * to an Entry (with Entry.add()). - * @param key The new feature key - * @param location The Location object for the new feature - * @param qualifiers The qualifiers for the new feature + * Create a new GFFStreamFeature object. The feature should be added to an + * Entry (with Entry.add()). + * + * @param key + * The new feature key + * @param location + * The Location object for the new feature + * @param qualifiers + * The qualifiers for the new feature **/ public GFFStreamFeature(final Key key, final Location location, - final QualifierVector qualifiers) - { + final QualifierVector qualifiers) { super(null); - try - { + try { setKey(key); setLocation(location); setQualifiers(qualifiers); - if(getQualifierByName("ID") == null) - { + if (getQualifierByName("ID") == null) { String idStr = null; StringVector v = Options.getOptions().getSystematicQualifierNames(); - for(int i=0; i<v.size(); i++) - { - final String sysName = (String)v.get(i); - if(getQualifierByName(sysName) != null) - { - idStr = (String)getQualifierByName(sysName).getValues().get(0); + for (int i = 0; i < v.size(); i++) { + final String sysName = (String) v.get(i); + if (getQualifierByName(sysName) != null) { + idStr = (String) getQualifierByName(sysName).getValues().get(0); break; } } // autogenerate ID - if(idStr == null) - idStr = key.getKeyString()+":"+location.toString(); + if (idStr == null) + idStr = key.getKeyString() + ":" + location.toString(); setQualifier(new Qualifier("ID", idStr)); } - } - catch(EntryInformationException e) - { + } catch (EntryInformationException e) { // this should never happen because the feature will not be in an Entry throw new Error("internal error - unexpected exception: " + e); - } - catch(ReadOnlyException e) - { + } catch (ReadOnlyException e) { // this should never happen because the feature will not be in an Entry throw new Error("internal error - unexpected exception: " + e); - } - catch(OutOfRangeException e) - { + } catch (OutOfRangeException e) { // this should never happen because the feature will not be in an Entry throw new Error("internal error - unexpected exception: " + e); } } - public GFFStreamFeature(final Feature feature) - { + public GFFStreamFeature(final Feature feature) { this(feature, false); } /** - * Create a new GFFStreamFeature with the same key, location and - * qualifiers as the given feature. The feature should be added to an - * Entry (with Entry.add()). - * @param feature The feature to copy. + * Create a new GFFStreamFeature with the same key, location and qualifiers as + * the given feature. The feature should be added to an Entry (with + * Entry.add()). + * + * @param feature + * The feature to copy. **/ - public GFFStreamFeature(final Feature feature, final boolean isDuplicatedInChado) - { + @SuppressWarnings("unchecked") + public GFFStreamFeature(final Feature feature, + final boolean isDuplicatedInChado) { this(feature.getKey(), feature.getLocation(), feature.getQualifiers()); - if(feature instanceof GFFStreamFeature) - { - if(((GFFStreamFeature)feature).id_range_store != null) - this.id_range_store = - (Hashtable)(((GFFStreamFeature)feature).id_range_store).clone(); + if (feature instanceof GFFStreamFeature) { + if (((GFFStreamFeature) feature).id_range_store != null) + this.id_range_store = (Hashtable<String, Range>) (((GFFStreamFeature) feature).id_range_store) + .clone(); - if(((GFFStreamFeature)feature).feature_relationship_rank_store != null) - this.feature_relationship_rank_store = - (Hashtable)(((GFFStreamFeature)feature).feature_relationship_rank_store).clone(); + if (((GFFStreamFeature) feature).feature_relationship_rank_store != null) + this.feature_relationship_rank_store = (Hashtable<String, Integer>) (((GFFStreamFeature) feature).feature_relationship_rank_store) + .clone(); - this.setGffSeqName(((GFFStreamFeature)feature).getGffSeqName()); - this.setGffSource(((GFFStreamFeature)feature).getGffSource()); + this.setGffSeqName(((GFFStreamFeature) feature).getGffSeqName()); + this.setGffSource(((GFFStreamFeature) feature).getGffSource()); - if(isDuplicatedInChado) - { - try - { + if (isDuplicatedInChado) { + try { final String uniquename; final String duplicatePrefix; - if(feature instanceof GFFStreamFeature) - { - ((GFFStreamFeature)feature).duplicate++; - duplicatePrefix = "DUP"+Short.toString(((GFFStreamFeature)feature).duplicate)+"-"; - } - else + if (feature instanceof GFFStreamFeature) { + ((GFFStreamFeature) feature).duplicate++; + duplicatePrefix = "DUP" + + Short.toString(((GFFStreamFeature) feature).duplicate) + "-"; + } else duplicatePrefix = "DUP"; - if(id_range_store != null) - { - final Hashtable<String, Range> new_id_range_store = new Hashtable<String, Range>(id_range_store.size()); + if (id_range_store != null) { + final Hashtable<String, Range> new_id_range_store = new Hashtable<String, Range>( + id_range_store.size()); final Enumeration<String> enumIdRangeStore = id_range_store.keys(); - while(enumIdRangeStore.hasMoreElements()) - { + while (enumIdRangeStore.hasMoreElements()) { final String keyId = enumIdRangeStore.nextElement(); - final Range range = id_range_store.get(keyId); - new_id_range_store.put(duplicatePrefix+keyId, range); + final Range range = id_range_store.get(keyId); + new_id_range_store.put(duplicatePrefix + keyId, range); } id_range_store.clear(); - this.id_range_store = (Hashtable) new_id_range_store.clone(); - + this.id_range_store = (Hashtable<String, Range>) new_id_range_store + .clone(); - if(getLocation().getRanges().size() > 1) + if (getLocation().getRanges().size() > 1) uniquename = getSegmentID(getLocation().getRanges()); - else - { - if( ((String)getQualifierByName("ID").getValues().get(0)).endsWith("}") ) + else { + if (((String) getQualifierByName("ID").getValues().get(0)) + .endsWith("}")) uniquename = id_range_store.keys().nextElement(); else - uniquename = duplicatePrefix+ (String)getQualifierByName("ID").getValues().get(0); + uniquename = duplicatePrefix + + (String) getQualifierByName("ID").getValues().get(0); } - } - else - uniquename = duplicatePrefix+ (String)getQualifierByName("ID").getValues().get(0); + } else + uniquename = duplicatePrefix + + (String) getQualifierByName("ID").getValues().get(0); setQualifier(new Qualifier("ID", uniquename)); - if(getQualifierByName("Parent") != null) - { - final String parent = - (String) getQualifierByName("Parent").getValues().get(0); - setQualifier(new Qualifier("Parent", duplicatePrefix+parent)); + if (getQualifierByName("Parent") != null) { + final String parent = (String) getQualifierByName("Parent") + .getValues().get(0); + setQualifier(new Qualifier("Parent", duplicatePrefix + parent)); } - if(getQualifierByName("Derives_from") != null) - { - final String derives_from = - (String) getQualifierByName("Derives_from").getValues().get(0); - setQualifier(new Qualifier("Derives_from", duplicatePrefix+derives_from)); + if (getQualifierByName("Derives_from") != null) { + final String derives_from = (String) getQualifierByName( + "Derives_from").getValues().get(0); + setQualifier(new Qualifier("Derives_from", duplicatePrefix + + derives_from)); } // remove qualifiers that don't get transferred to duplicate - final String removeQualifierNames[] = - { "feature_id", - "timelastmodified", - "feature_relationship_rank", - ProteinMapPanel.POLYPEPTIDE_DOMAIN, - ProteinMapPanel.TMHMM[0], - ProteinMapPanel.TMHMM[1], - ProteinMapPanel.TMHMM[2], - ProteinMapPanel.TMHMM[3], - MatchPanel.ORTHOLOG, - MatchPanel.ORTHOLOG - }; - - for(int i=0;i<removeQualifierNames.length; i++) + final String removeQualifierNames[] = { "feature_id", + "timelastmodified", "feature_relationship_rank", + ProteinMapPanel.POLYPEPTIDE_DOMAIN, ProteinMapPanel.TMHMM[0], + ProteinMapPanel.TMHMM[1], ProteinMapPanel.TMHMM[2], + ProteinMapPanel.TMHMM[3], MatchPanel.ORTHOLOG, + MatchPanel.ORTHOLOG }; + + for (int i = 0; i < removeQualifierNames.length; i++) removeQualifierByName(removeQualifierNames[i]); + } catch (ReadOnlyException e) { + } catch (EntryInformationException e) { } - catch(ReadOnlyException e){} - catch(EntryInformationException e){} - } - else - { - chadoGene = ((GFFStreamFeature)feature).chadoGene; + } else { + chadoGene = ((GFFStreamFeature) feature).chadoGene; } } } /** - * Create a new GFFStreamFeature from the given line. The String should be - * in gene finder format. + * Create a new GFFStreamFeature from the given line. The String should be in + * gene finder format. **/ - public GFFStreamFeature(final String line) - throws ReadFormatException - { + public GFFStreamFeature(final String line) throws ReadFormatException { super(null); final StringVector line_bits = StringVector.getStrings(line, "\t", true); - if(line_bits.size() < 8) - throw new ReadFormatException("invalid GFF line: 8 fields needed " + - "(got " + line_bits.size () + - " fields) from: " + line); + if (line_bits.size() < 8) + throw new ReadFormatException("invalid GFF line: 8 fields needed " + + "(got " + line_bits.size() + " fields) from: " + line); final String start_base_str = line_bits.elementAt(3).trim(); - final String end_base_str = line_bits.elementAt(4).trim(); + final String end_base_str = line_bits.elementAt(4).trim(); final int start_base; final int end_base; - try - { + try { start_base = Integer.parseInt(start_base_str); - end_base = Integer.parseInt(end_base_str); - } - catch(NumberFormatException e) - { - throw new ReadFormatException("Could not understand the start or end base " + - "of a GFF feature: " + start_base_str + - " " + end_base_str); + end_base = Integer.parseInt(end_base_str); + } catch (NumberFormatException e) { + throw new ReadFormatException( + "Could not understand the start or end base " + "of a GFF feature: " + + start_base_str + " " + end_base_str); } // start of qualifier parsing and setting - try - { + try { final boolean complement_flag; - if(line_bits.elementAt(6).equals("+")) + if (line_bits.elementAt(6).equals("+")) complement_flag = false; - else if(line_bits.elementAt(6).equals("-")) + else if (line_bits.elementAt(6).equals("-")) complement_flag = true; - else - { + else { // must be unstranded complement_flag = false; } - if(line_bits.size() == 9) - { + if (line_bits.size() == 9) { final String rest_of_line = line_bits.elementAt(8); final Hashtable<String, StringVector> attributes = parseAttributes(rest_of_line); - for(final Enumeration<String> attribute_enum = attributes.keys(); - attribute_enum.hasMoreElements();) - { + for (final Enumeration<String> attribute_enum = attributes.keys(); attribute_enum + .hasMoreElements();) { String name = attribute_enum.nextElement(); final StringVector values = attributes.get(name); - if(MatchPanel.isClusterTag(name)) - { + if (MatchPanel.isClusterTag(name)) { List<ClusterLazyQualifierValue> lazyValues = new Vector<ClusterLazyQualifierValue>(); - for(int i=0; i<values.size(); i++) - lazyValues.add( - new ClusterLazyQualifierValue( (String)values.get(i), name, - this )); + for (int i = 0; i < values.size(); i++) + lazyValues.add(new ClusterLazyQualifierValue((String) values + .get(i), name, this)); setQualifier(new QualifierLazyLoading(name, lazyValues)); - } - else - { - if(values.size() == 0) + } else { + if (values.size() == 0) setQualifier(new Qualifier(name)); else setQualifier(new Qualifier(name, values)); @@ -378,200 +320,173 @@ public class GFFStreamFeature extends SimpleDocumentFeature } } - if( !line_bits.elementAt(0).equals("null") ) - setGffSeqName( decode(line_bits.elementAt(0)) ); + if (!line_bits.elementAt(0).equals("null")) + setGffSeqName(GFF3Encoder.decode(line_bits.elementAt(0))); setKey(new Key(line_bits.elementAt(2))); setGffSource(line_bits.elementAt(1)); - if( !line_bits.elementAt(5).equals(".") ) - { - final Qualifier score_qualifier = - new Qualifier("score", line_bits.elementAt(5)); + if (!line_bits.elementAt(5).equals(".")) { + final Qualifier score_qualifier = new Qualifier("score", + line_bits.elementAt(5)); setQualifier(score_qualifier); } String frame = line_bits.elementAt(7); - if(frame.equals ("0")) + if (frame.equals("0")) frame = "1"; - else if(frame.equals("1")) + else if (frame.equals("1")) frame = "2"; - else if(frame.equals("2")) + else if (frame.equals("2")) frame = "3"; else frame = "."; - if(!frame.equals(".")) - { - final Qualifier codon_start_qualifier = - new Qualifier("codon_start", frame); + if (!frame.equals(".")) { + final Qualifier codon_start_qualifier = new Qualifier("codon_start", + frame); setQualifier(codon_start_qualifier); } - if(start_base > end_base) - throw new ReadFormatException("start position is greater than end " + - "position: " + start_base + " > " + - end_base+"\n"+line); + if (start_base > end_base) + throw new ReadFormatException("start position is greater than end " + + "position: " + start_base + " > " + end_base + "\n" + line); - if(start_base < 0) - throw new ReadFormatException("start position must be positive: " + - start_base); + if (start_base < 0) + throw new ReadFormatException("start position must be positive: " + + start_base); final Range location_range = new Range(start_base, end_base); final RangeVector location_ranges = new RangeVector(location_range); setLocation(new Location(location_ranges, complement_flag)); - } - catch(ReadOnlyException e) - { + } catch (ReadOnlyException e) { throw new Error("internal error - unexpected exception: " + e); - } - catch(EntryInformationException e) - { + } catch (EntryInformationException e) { throw new Error("internal error - unexpected exception: " + e); - } - catch(OutOfRangeException e) - { + } catch (OutOfRangeException e) { throw new Error("internal error - unexpected exception: " + e); } - //this.gff_lines = new StringVector(line); + // this.gff_lines = new StringVector(line); } /** - * - * Store for spliced regions of segments ID's and ranges. - * - */ - public void setSegmentRangeStore(Hashtable<String, Range> id_range_store) - { + * + * Store for spliced regions of segments ID's and ranges. + * + */ + public void setSegmentRangeStore(Hashtable<String, Range> id_range_store) { this.id_range_store = id_range_store; } - public Hashtable<String, Range> getSegmentRangeStore() - { - if(id_range_store == null) - { + public Hashtable<String, Range> getSegmentRangeStore() { + if (id_range_store == null) { id_range_store = new Hashtable<String, Range>(); - id_range_store.put((String)this.getQualifierByName("ID").getValues().get(0), - this.getLocation().getTotalRange()); + id_range_store.put((String) this.getQualifierByName("ID").getValues() + .get(0), this.getLocation().getTotalRange()); } return id_range_store; } - public Hashtable<String, String> getNewIdMapToOldId() - { + public Hashtable<String, String> getNewIdMapToOldId() { return newIdMapToOldId; } /** * Used when changing spliced feature uniquenames + * * @param newIdMapToOldId */ - public void setNewIdMapToOldId(Hashtable<String, String> newIdMapToOldId) - { + public void setNewIdMapToOldId(Hashtable<String, String> newIdMapToOldId) { this.newIdMapToOldId = newIdMapToOldId; } /** * Store for ID's and CHADO feature_relationship.rank + * * @param feature_relationship_rank_store */ public void setFeature_relationship_rank_store( - Hashtable<String, Integer> feature_relationship_rank_store) - { + Hashtable<String, Integer> feature_relationship_rank_store) { this.feature_relationship_rank_store = feature_relationship_rank_store; } /** * Store for ID's and CHADO feature_relationship.rank + * * @return */ - public Hashtable<String, Integer> getFeature_relationship_rank_store() - { + public Hashtable<String, Integer> getFeature_relationship_rank_store() { return feature_relationship_rank_store; } - /** * Get the chado uniquename + * * @param r * @return */ - public String getSegmentID(final Range r) - { - if(id_range_store != null) - { + public String getSegmentID(final Range r) { + if (id_range_store != null) { int offset = 0; - if(getGffSeqName() != null && contig_ranges != null && - contig_ranges.containsKey(getGffSeqName())) - { + if (getGffSeqName() != null && contig_ranges != null + && contig_ranges.containsKey(getGffSeqName())) { // adjust for coordinates in multi-sequence GFF Range offset_range = contig_ranges.get(getGffSeqName()); - offset = offset_range.getStart()-1; + offset = offset_range.getStart() - 1; } Enumeration<String> enum_ranges = id_range_store.keys(); - while(enum_ranges.hasMoreElements()) - { - String key = enum_ranges.nextElement(); + while (enum_ranges.hasMoreElements()) { + String key = enum_ranges.nextElement(); Range range = id_range_store.get(key); - if(range.getStart() == r.getStart()-offset && - range.getEnd() == r.getEnd()-offset) + if (range.getStart() == r.getStart() - offset + && range.getEnd() == r.getEnd() - offset) return key; } - } - else if (getQualifierByName("ID") != null) - { - return (String)getQualifierByName("ID").getValues().get(0); + } else if (getQualifierByName("ID") != null) { + return (String) getQualifierByName("ID").getValues().get(0); } - logger4j.warn("RANGE NOT FOUND "+r.toString()); + logger4j.warn("RANGE NOT FOUND " + r.toString()); return null; } /** - * Get the feature ID based on the segments chado - * uniquename's. + * Get the feature ID based on the segments chado uniquename's. + * * @param rv * @return */ - public String getSegmentID(final RangeVector rv) - { + public String getSegmentID(final RangeVector rv) { String id = ""; - if(id_range_store != null) - { + if (id_range_store != null) { String id_new; Range range; int index; - for(int i=0; i<rv.size(); i++) - { - range = (Range)rv.get(i); + for (int i = 0; i < rv.size(); i++) { + range = (Range) rv.get(i); id_new = getSegmentID(range); String prefix[] = getPrefix(id_new, ':'); - if(prefix[0] != null) - { + if (prefix[0] != null) { index = id.indexOf(prefix[0]); - if(id.equals("") || index < 0) - { - if(!id.equals("")) - id = id +","; - id = id+prefix[0] + "{" + prefix[1] + "}"; + if (id.equals("") || index < 0) { + if (!id.equals("")) + id = id + ","; + id = id + prefix[0] + "{" + prefix[1] + "}"; continue; } index = id.indexOf('}', index); - id = id.substring(0,index) + "," + - prefix[1] + id.substring(index); - } - else if(id_new != null) - { - if(!id.equals("")) - id = id +","; - id = id+id_new; + id = id.substring(0, index) + "," + prefix[1] + id.substring(index); + } else if (id_new != null) { + if (!id.equals("")) + id = id + ","; + id = id + id_new; } } } @@ -580,204 +495,149 @@ public class GFFStreamFeature extends SimpleDocumentFeature } /** - * Get the ID prefix, e.g. for SPAC1556.06.1:exon:2 - * returns SPAC1556.06.1:exon as the prefix and 2 as the - * index. + * Get the ID prefix, e.g. for SPAC1556.06.1:exon:2 returns SPAC1556.06.1:exon + * as the prefix and 2 as the index. + * * @param id * @return */ - public String[] getPrefix(final String id, - final char separator) - { + public String[] getPrefix(final String id, final char separator) { String prefix[] = new String[2]; int index = id.lastIndexOf(separator); - if(index > -1) - { - prefix[0] = id.substring(0,index); - prefix[1] = id.substring(index+1); + if (index > -1) { + prefix[0] = id.substring(0, index); + prefix[1] = id.substring(index + 1); } return prefix; } /** * Used to automatically generate + * * @param prefix * @return */ - public int getAutoNumber(final String prefix, - final char separator) - { - int auto = 1; + public int getAutoNumber(final String prefix, final char separator) { + int auto = 1; String val = prefix + separator + auto; - while(id_range_store.containsKey(val)) - { + while (id_range_store.containsKey(val)) { auto++; val = prefix + separator + auto; } return auto; } - - /** - * For gff-version 3: - * http://www.sequenceontology.org/gff3.shtml - * Remove URL escaping rule (e.g. space="%20" or "+") - */ - public static String decode(String s) - { - int ind; - String enc; - String dec; - - for(int i=0; i<MAP_DECODE.length; i++) - { - enc = MAP_DECODE[i][1]; - dec = MAP_DECODE[i][0]; - while( (ind = s.indexOf(enc)) > -1) - s = s.substring(0,ind) + dec + s.substring(ind+enc.length()); - } - return s; - } - - - /** - * For gff-version 3: - * http://www.sequenceontology.org/gff3.shtml - * Add URL escaping rule (e.g. space="%20" or "+") - */ - public static String encode(String s) - { - int ind; - String enc; - String dec; - - for(int i=0; i<MAP_ENCODE.length; i++) - { - enc = MAP_ENCODE[i][1]; - dec = MAP_ENCODE[i][0]; - while( (ind = s.indexOf(dec)) > -1 ) - s = s.substring(0,ind) + enc + s.substring(ind+1); - } - return s; - } - - /** - * Return the reference of a new copy of this Feature. + * Return the reference of a new copy of this Feature. **/ - public Feature copy() - { + public Feature copy() { final Feature return_value = new GFFStreamFeature(this); return return_value; } /** - * Read and return a GFFStreamFeature from a stream. A feature must be the - * next thing in the stream. - * @param stream the Feature is read from this stream - * @exception IOException thrown if there is a problem reading the Feature - - * most likely ReadFormatException. - * @exception InvalidRelationException Thrown if this Feature cannot contain - * the given Qualifier. - * @return null if in_stream is at the end of file when the method is - * called + * Read and return a GFFStreamFeature from a stream. A feature must be the + * next thing in the stream. + * + * @param stream + * the Feature is read from this stream + * @exception IOException + * thrown if there is a problem reading the Feature - most likely + * ReadFormatException. + * @exception InvalidRelationException + * Thrown if this Feature cannot contain the given Qualifier. + * @return null if in_stream is at the end of file when the method is called */ protected static GFFStreamFeature readFromStream(LinePushBackReader stream) - throws IOException, InvalidRelationException - { + throws IOException, InvalidRelationException { final String line = stream.readLine(); - if(line == null) + if (line == null) return null; - try - { + try { return new GFFStreamFeature(line); - } - catch(ReadFormatException exception) - { + } catch (ReadFormatException exception) { // re-throw the exception with the line number added final String new_error_string = exception.getMessage(); - throw new ReadFormatException(new_error_string, - stream.getLineNumber()); + throw new ReadFormatException(new_error_string, stream.getLineNumber()); } } /** - * Read the details of a feature from an EMBL stream into the current - * object. - * @param entry_information The EntryInformation object of the Entry that - * will contain the Feature. - * @param in_stream the Feature is read from this stream - * @exception IOException thrown if there is a problem reading the Feature - - * most likely ReadFormatException if the stream does not contain GFF - * feature. + * Read the details of a feature from an EMBL stream into the current object. + * + * @param entry_information + * The EntryInformation object of the Entry that will contain the + * Feature. + * @param in_stream + * the Feature is read from this stream + * @exception IOException + * thrown if there is a problem reading the Feature - most likely + * ReadFormatException if the stream does not contain GFF + * feature. **/ public void setFromStream(final EntryInformation entry_information, - final LinePushBackReader in_stream) - throws IOException, InvalidRelationException, ReadOnlyException - { + final LinePushBackReader in_stream) throws IOException, + InvalidRelationException, ReadOnlyException { throw new ReadOnlyException(); } /** - * Write this Feature to the given stream. - * @param writer The stream to write to. - * @exception IOException thrown if there is an io problem while writing - * the Feature. + * Write this Feature to the given stream. + * + * @param writer + * The stream to write to. + * @exception IOException + * thrown if there is an io problem while writing the Feature. **/ - public void writeToStream(final Writer writer) - throws IOException - { + public void writeToStream(final Writer writer) throws IOException { final RangeVector ranges = getLocation().getRanges(); final int ranges_size = ranges.size(); -// final Hashtable contig_ranges = SimpleDocumentEntry.getContigRanges(); - for(int i = 0; i < ranges_size; ++i) - { - Range this_range = (Range)ranges.elementAt(i); + // final Hashtable contig_ranges = SimpleDocumentEntry.getContigRanges(); + for (int i = 0; i < ranges_size; ++i) { + Range this_range = (Range) ranges.elementAt(i); String seqname = getGffSeqName(); - String source = getGffSource(); - Qualifier score = getQualifierByName("score"); - Qualifier group = getQualifierByName("group"); + String source = getGffSource(); + Qualifier score = getQualifierByName("score"); + Qualifier group = getQualifierByName("group"); // source becomes a Dbxref in chado String source_str = null; - if(getQualifierByName("Dbxref") != null) - { + if (getQualifierByName("Dbxref") != null) { source_str = getDbxrefGFFSource(getQualifierByName("Dbxref")); } int start = this_range.getStart(); - int end = this_range.getEnd(); + int end = this_range.getEnd(); - if(seqname == null && ((GFFDocumentEntry)getEntry()).getDocument() != null) - seqname = ((GFFDocumentEntry)getEntry()).getDocument().getName(); - if(seqname == null) + if (seqname == null + && ((GFFDocumentEntry) getEntry()).getDocument() != null) + seqname = ((GFFDocumentEntry) getEntry()).getDocument().getName(); + if (seqname == null) seqname = deriveSeqName(start); - if(source == null) + if (source == null) source = "artemis"; - if(score == null) + if (score == null) score = new Qualifier("score", "."); - if(seqname != null && contig_ranges != null && - contig_ranges.containsKey(seqname)) - { + if (seqname != null && contig_ranges != null + && contig_ranges.containsKey(seqname)) { Range offset_range = contig_ranges.get(seqname); - start = start-offset_range.getStart()+1; - end = end-offset_range.getStart()+1; + start = start - offset_range.getStart() + 1; + end = end - offset_range.getStart() + 1; } - if(group == null || group.getValues() == null || - group.getValues().elementAt(0).equals("")) - { + if (group == null || group.getValues() == null + || group.getValues().elementAt(0).equals("")) { final Qualifier gene = getQualifierByName("gene"); - if(gene == null) + if (gene == null) group = new Qualifier("group", ""); else group = gene; @@ -786,333 +646,358 @@ public class GFFStreamFeature extends SimpleDocumentFeature String frame = "."; final Qualifier codon_start = getQualifierByName("codon_start"); - if(codon_start != null) - { - frame = (String)(codon_start.getValues()).elementAt(0); + if (codon_start != null) { + frame = (String) (codon_start.getValues()).elementAt(0); - if(frame.equals ("1")) + if (frame.equals("1")) frame = "0"; - else if(frame.equals("2")) + else if (frame.equals("2")) frame = "1"; - else if(frame.equals("3")) + else if (frame.equals("3")) frame = "2"; else frame = "."; } // phase is REQUIRED for all CDS features - if(getKey().equals("CDS") && frame.equals(".")) + if (getKey().equals("CDS") && frame.equals(".")) frame = "0"; final String myId = getSegmentID(this_range); String attribute_string = unParseAttributes(myId); - if(source_str == null && source != null) - source_str = source; + if (source_str == null && source != null) + source_str = source; final String translation = getTranslation(); - if(translation != null) + if (translation != null) attribute_string = attribute_string + ";" + translation; - writer.write(seqname + "\t" + - source_str + "\t" + - getKey().getKeyString() + "\t" + - start + "\t" + - end + "\t" + - score.getValues() .elementAt(0)+ "\t" + - (getLocation().isComplement() ? "-\t" : "+\t") + - frame + "\t" + - attribute_string + "\n"); + writer.write(seqname + "\t" + source_str + "\t" + getKey().getKeyString() + + "\t" + start + "\t" + end + "\t" + score.getValues().elementAt(0) + + "\t" + (getLocation().isComplement() ? "-\t" : "+\t") + frame + + "\t" + attribute_string + "\n"); } } /** - * If the seqname is not set for this feature try to derive the contig/chromosome - * it is located on + * If the seqname is not set for this feature try to derive the + * contig/chromosome it is located on + * * @param start * @return */ - private String deriveSeqName(int start) - { + private String deriveSeqName(int start) { String seqname = null; - if(contig_ranges != null) - { + if (contig_ranges != null) { final Enumeration<String> contigEnum = contig_ranges.keys(); - while(contigEnum.hasMoreElements()) - { + while (contigEnum.hasMoreElements()) { final String key = contigEnum.nextElement(); final Range r = contig_ranges.get(key); - if(r.getStart() > start) + if (r.getStart() > start) continue; - if(r.getEnd() > start) + if (r.getEnd() > start) return key; } - } - else - { - try - { - seqname = ((GFFStreamFeature)(getEntry().getAllFeatures().elementAt(0))).getGffSeqName(); + } else { + try { + seqname = ((GFFStreamFeature) (getEntry().getAllFeatures().elementAt(0))) + .getGffSeqName(); + } catch (Exception e) { } - catch(Exception e) {} } - if(seqname == null) + if (seqname == null) seqname = "gff_seqname"; return seqname; } /** - * Return a String containing the qualifiers of this feature in a form - * suitable for using as the last field of a GFF line. The codon_start - * attribute is not included since GFF has a frame field. gff_seqname, - * gff_source and score aren't included since they have corresponding - * fields. + * Return a String containing the qualifiers of this feature in a form + * suitable for using as the last field of a GFF line. **/ - private String unParseAttributes(final String myId) - { - final StringBuffer buffer = new StringBuffer(); + private String unParseAttributes(final String myId) { final QualifierVector qualifiers = getQualifiers(); - - final String names[] = { "ID", "Name", "Alias", "Parent", - "Derives_from", - "Target", "Gap", "Note", - "Dbxref", "Ontology_term", - "Start_range", "End_range", - "Is_circular"}; - int count = 0; - final int names_length = names.length; - - if(myId != null) - { - buffer.append("ID="); - buffer.append(encode(myId)); - count++; + GFF3AttributeBuilder abuf = new GFF3AttributeBuilder(); + prepareProcessors(abuf); + + for (String attr : attrs_to_filter) { + abuf.ignore(attr); } - for(int i=1; i<names_length; i++) - { - Qualifier this_qualifier = qualifiers.getQualifierByName(names[i]); + final int names_length = abuf.reserved_a.length; - if(this_qualifier == null) - continue; - - final String this_qualifier_str = getQualifierString(this_qualifier, true); - if(this_qualifier_str == null) + // add ID attribute + if (myId != null) { + abuf.add("ID", myId); + } + + // build reserved attributes + for (int i = 1; i < names_length; i++) { + Qualifier this_qualifier = qualifiers.getQualifierByName(abuf.reserved_a[i]); + + if (this_qualifier == null) continue; - if(count != 0) - buffer.append(";"); - buffer.append(this_qualifier_str); - count++; + abuf.add(this_qualifier.getName(), this_qualifier.getValues()); } + // build remaining attributes boolean lname; - for(Qualifier this_qualifier: qualifiers) - { + for (Qualifier this_qualifier : qualifiers) { lname = false; - for(int j=0; j<names_length; j++) - if(this_qualifier.getName().equals(names[j])) - lname = true; - if(lname) - continue; - - if(attrs_to_filter.contains(this_qualifier.getName())) - continue; - - if( (this_qualifier.getName().equals("private") && System.getProperty("noprivate") != null) || - (this_qualifier.getName().equals("history") && System.getProperty("nohistory") != null) ) + // skip reserved names + for (int j = 0; j < names_length; j++) + if (this_qualifier.getName().equals(abuf.reserved_a[j])) + lname = true; + if (lname) continue; - final String this_qualifier_str = getQualifierString(this_qualifier, false); - - if(this_qualifier_str == null) + // skip internal qualifiers + if ((this_qualifier.getName().equals("private") && System + .getProperty("noprivate") != null) + || (this_qualifier.getName().equals("history") && System + .getProperty("nohistory") != null)) continue; - if(count != 0) - buffer.append(";"); - buffer.append(this_qualifier_str); + abuf.add(this_qualifier.getName(), this_qualifier.getValues()); } - return buffer.toString(); + return abuf.toString(); } + void prepareProcessors(GFF3AttributeBuilder abuf) { + GFF3AttributeAggregator productProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + final String this_value; + int index = values.elementAt(value_index).indexOf("term="); + // strip off the 'term=' etc + if (index > -1) + this_value = GFF3Encoder.encode(values.elementAt(value_index) + .substring(index + 5, + values.elementAt(value_index).length() - 1)); + else + this_value = GFF3Encoder.encode(values.elementAt(value_index)); + if (value_index > 0 && value_index < (values.size())) { + buffer.append(","); + } + buffer.append(this_value); + } + } + return buffer.toString(); + } + }; + + GFF3AttributeAggregator ecProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + final String this_value = "EC:" + + GFF3Encoder.encode(values.elementAt(value_index)); + if (value_index > 0 && value_index < (values.size())) { + buffer.append(","); + } + buffer.append(this_value); + } + } + return buffer.toString(); + } + }; + + GFF3AttributeAggregator psysIDProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + final String this_value; + int index = values.elementAt(value_index).indexOf(";current="); + if (index > -1) + this_value = GFF3Encoder.encode(values.elementAt(value_index) + .substring(0, index - 1)); + else + this_value = GFF3Encoder.encode(values.elementAt(value_index)); + if (value_index > 0 && value_index < (values.size())) { + buffer.append(","); + } + buffer.append(this_value); + } + } + return buffer.toString(); + } + }; + + GFF3AttributeAggregator classProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + final String this_value; + int index = values.elementAt(value_index).indexOf("::"); + if (index > -1) + this_value = GFF3Encoder.encode(values.elementAt(value_index) + .substring(0, index)); + else + this_value = GFF3Encoder.encode(values.elementAt(value_index)); + if (value_index > 0 && value_index < (values.size())) { + buffer.append(","); + } + buffer.append(this_value); + } + } + return buffer.toString(); + } + }; + + GFF3AttributeAggregator startEndRangeProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + if (value_index > 0 && value_index < (values.size())) { + buffer.append(","); + } + buffer.append(values.elementAt(value_index)); + } + } + return buffer.toString(); + } + }; + + GFF3AttributeAggregator goProc = new GFF3AttributeAggregator() { + @Override + public String process(StringVector values) { + StringBuilder buffer = new StringBuilder(); + if (values != null && values.size() > 0) { + for (int value_index = 0; value_index < values.size(); ++value_index) { + int goindex = values.elementAt(value_index).indexOf("GOid="); + int termindex = values.elementAt(value_index).indexOf(";term="); + if (goindex > -1 && termindex > -1) { + buffer.append(GFF3Encoder.encode(values.elementAt(value_index) + .substring(goindex + 5, termindex))); + if (value_index < (values.size()) - 1) + buffer.append(","); + } + } + } + return buffer.toString(); + } + }; + + // map GO -> full_GO + abuf.setMapping("GO", "full_GO"); + abuf.setGlue("full_GO", ","); + + // merge curation and comment + abuf.setMapping("curation", "comment"); + + // also put GOs in Ontology_term + abuf.setClone("full_GO", "Ontology_term"); + abuf.setAggregator("Ontology_term", goProc); + abuf.setGlue("Ontology_term", ","); + + // class + abuf.setAggregator("class", classProc); + + // EC numbers go into Dbxref + abuf.setMapping("EC_number", "Dbxref"); + abuf.setAggregator("EC_number", ecProc); + abuf.setGlue("Dbxref", ","); + + // start/end ranges + abuf.setAggregator("Start_range", startEndRangeProc); + abuf.setAggregator("End_range", startEndRangeProc); + + // previous_systematic_id + abuf.setAggregator("previous_systematic_id", psysIDProc); + + // product + abuf.setAggregator("product", productProc); + } /** * Get the translation qualifier string for polypeptide features. */ - private String getTranslation() - { - if (! getKey().getKeyString().equals("polypeptide")) - return null; - if (chadoGene != null) - { - if(getUserData() == null) + private String getTranslation() { + if (!getKey().getKeyString().equals("polypeptide")) + return null; + if (chadoGene != null) { + if (getUserData() == null) new uk.ac.sanger.artemis.Feature(this); - // the above line constructs the appropriate userData within this current GFFStreamFeature object, + // the above line constructs the appropriate userData within this current + // GFFStreamFeature object, // which is required by the following GeneUtils.deriveResidues() String residues = GeneUtils.deriveResidues(this); if (residues != null) - return "translation="+residues; + return "translation=" + residues; } return null; } /** - * Used to write out the GFF attributes. - * @param q the qualifier to represent as a <code>String</code> - * @param reserved indicate if this is one of the reserved tags or not - * @return the <code>String</code> representation - */ - private String getQualifierString(Qualifier q, boolean reserved ) - { - StringBuffer buffer = new StringBuffer(); - final String name = q.getName(); - - if(name.equals("codon_start") || name.equals("gff_source") || - name.equals("gff_seqname") || name.equals("score")) - return null; - - final StringVector values = q.getValues(); - - /* ignore qualifiers with just one empty value, will mess up GFF3 output */ - if(values != null && values.size() == 1) - { - if (values.elementAt(0).replaceAll("\\s+","").equals("")) - return null; - } - - /* - * GSV : - * The Bio::FeatureIO perl module falls over if there are Uppercased - * attribute names for tags which aren't part of the standard reserved - * set. So we lowercase these, since in the specification it says : - * - * "All attributes that begin with an uppercase letter are reserved for - * later use. Attributes that begin with a lowercase letter can be used - * freely by applications." - * see http://www.sequenceontology.org/gff3.shtml - */ - String nameToBuffer = encode(name); - - if (! reserved) - nameToBuffer = Character.toLowerCase(nameToBuffer.charAt(0)) + nameToBuffer.substring(1); - buffer.append(nameToBuffer); - - if(values != null && values.size() > 0) - { - buffer.append('='); - for(int value_index = 0; value_index < values.size(); - ++value_index) - { - final String this_value; - if(name.equals("class")) - { - int index = values.elementAt(value_index).indexOf("::"); - if(index > -1) - this_value = encode(values.elementAt(value_index).substring(0,index)); - else - this_value = encode(values.elementAt(value_index)); - } - else - this_value = encode(values.elementAt(value_index)); - - if(value_index>0) - buffer.append("%2C"); - - if(name.equals("Parent")) - buffer.append(this_value); - else - { - try - { - buffer.append(Integer.valueOf(this_value)); - } - catch(NumberFormatException _) - { - // not an integer - try - { - buffer.append(Double.valueOf(this_value)); - } - catch (NumberFormatException __) - { - // not a double or integer so quote it - buffer.append(this_value); - } - } - } - } - } - if (buffer.toString().charAt(buffer.toString().length()-1) == '=') - System.out.println(buffer.toString() + " ----- values length was " + values.size() + ": '" + values.elementAt(0) + "'"); - return buffer.toString(); - } - - /** - * Parse the given String as ACeDB format attributes. - * Adapted from code by Matthew Pocock for the BioJava project. - * - * Modified for gff-version 3. - * @return Return a Hashtable. Each key is an attribute name and each value - * of the Hashtable is a StringVector containing the attribute values. - * If the attribute has no value then the Hashtable value will be a zero - * length vector. + * Parse the given String as ACeDB format attributes. Adapted from code by + * Matthew Pocock for the BioJava project. + * + * Modified for gff-version 3. + * + * @return Return a Hashtable. Each key is an attribute name and each value of + * the Hashtable is a StringVector containing the attribute values. If + * the attribute has no value then the Hashtable value will be a zero + * length vector. **/ - private Hashtable<String, StringVector> parseAttributes(final String att_val_list) - { + private Hashtable<String, StringVector> parseAttributes( + final String att_val_list) { final Hashtable<String, StringVector> attr = new Hashtable<String, StringVector>(); int ind_start = 0; int ind_end; - while( (ind_end = att_val_list.indexOf(";",ind_start)) > -1 || - ind_start < att_val_list.length() ) - { - if(ind_end < 0) + while ((ind_end = att_val_list.indexOf(";", ind_start)) > -1 + || ind_start < att_val_list.length()) { + if (ind_end < 0) ind_end = att_val_list.length(); - final String this_token = decode(att_val_list.substring(ind_start, ind_end).trim()); - ind_start = ind_end+1; + final String this_token = GFF3Encoder.decode(att_val_list.substring(ind_start, + ind_end).trim()); + ind_start = ind_end + 1; int index_of_first_space = this_token.indexOf(" "); final String att_name; StringVector att_values = new StringVector(); - if( this_token.indexOf("=") > -1 && - (this_token.indexOf("=") < index_of_first_space || - index_of_first_space == -1) ) - { + if (this_token.indexOf("=") > -1 + && (this_token.indexOf("=") < index_of_first_space || index_of_first_space == -1)) { index_of_first_space = this_token.indexOf("="); att_name = this_token.substring(0, index_of_first_space); - att_values.add(this_token.substring(index_of_first_space+1).trim()); - } - else if(index_of_first_space == -1) + att_values.add(this_token.substring(index_of_first_space + 1).trim()); + } else if (index_of_first_space == -1) att_name = this_token; - else - { + else { att_name = this_token.substring(0, index_of_first_space); - String rest_of_token = - this_token.substring(index_of_first_space+1).trim(); + String rest_of_token = this_token.substring(index_of_first_space + 1) + .trim(); - while(rest_of_token.length() > 0) - { - if(rest_of_token.startsWith("\"")) - { + while (rest_of_token.length() > 0) { + if (rest_of_token.startsWith("\"")) { int quote_index = 0; - do - { + do { quote_index++; quote_index = rest_of_token.indexOf("\"", quote_index); - } while(quote_index > -1 && - rest_of_token.charAt(quote_index - 1) == '\\'); + } while (quote_index > -1 + && rest_of_token.charAt(quote_index - 1) == '\\'); - if(quote_index < 0) - { + if (quote_index < 0) { // no closing quote - panic - final Hashtable<String, StringVector> panic_attributes = - new Hashtable<String, StringVector>(); + final Hashtable<String, StringVector> panic_attributes = new Hashtable<String, StringVector>(); final StringVector notes = new StringVector(); notes.add(att_val_list); panic_attributes.put("note", notes); @@ -1123,60 +1008,52 @@ public class GFFStreamFeature extends SimpleDocumentFeature final String next_bit = rest_of_token.substring(1, quote_index); att_values.add(next_bit); rest_of_token = rest_of_token.substring(quote_index + 1).trim(); - } - else - { + } else { final int index_of_next_space = rest_of_token.indexOf(" "); - if(index_of_next_space == -1) - { + if (index_of_next_space == -1) { att_values.add(rest_of_token); rest_of_token = ""; - } - else - { - final String next_bit = - rest_of_token.substring(0, index_of_next_space); + } else { + final String next_bit = rest_of_token.substring(0, + index_of_next_space); att_values.add(next_bit); - rest_of_token = - rest_of_token.substring(index_of_next_space).trim(); + rest_of_token = rest_of_token.substring(index_of_next_space) + .trim(); } } } - if(!rest_of_token.equals("")) + if (!rest_of_token.equals("")) att_values.add(rest_of_token); } - if(att_name.equals("Dbxref") || att_name.equals("Alias")) // convert to multi-line + if (att_name.equals("Dbxref") || att_name.equals("Alias")) // convert to + // multi-line { - StringTokenizer stok = - new StringTokenizer((String)att_values.get(0), ","); + StringTokenizer stok = new StringTokenizer((String) att_values.get(0), + ","); StringVector str_values = new StringVector(); - while(stok.hasMoreTokens()) + while (stok.hasMoreTokens()) str_values.add(stok.nextToken()); att_values = str_values; } - if(att_name.equals("timelastmodified")) - { - try - { - this.timelastmodified = - new Timestamp( Long.parseLong((String)att_values.get(0)) ); - SimpleDateFormat date_format = - new SimpleDateFormat("dd.MM.yyyy hh:mm:ss z"); - att_values.set(0,date_format.format(timelastmodified)); - } - catch(NumberFormatException e) - { - att_values.set(0,(String)att_values.get(0)); + if (att_name.equals("timelastmodified")) { + try { + this.timelastmodified = new Timestamp( + Long.parseLong((String) att_values.get(0))); + SimpleDateFormat date_format = new SimpleDateFormat( + "dd.MM.yyyy hh:mm:ss z"); + att_values.set(0, date_format.format(timelastmodified)); + } catch (NumberFormatException e) { + att_values.set(0, (String) att_values.get(0)); } } - if(attr.get(att_name) != null) + if (attr.get(att_name) != null) attr.get(att_name).add(att_values); else attr.put(att_name, att_values); @@ -1187,32 +1064,30 @@ public class GFFStreamFeature extends SimpleDocumentFeature /** * Get the feature time last modified timestamp. + * * @return */ - public Timestamp getLastModified() - { + public Timestamp getLastModified() { return timelastmodified; } /** * Get the GFF_source value of a Dbxref qualifier. + * * @param qualifier - * @return the gff_source value or NULL + * @return the gff_source value or NULL */ - private String getDbxrefGFFSource(final Qualifier qualifier) - { - StringVector qualifier_strings = - StreamQualifier.toStringVector(null, qualifier); + private String getDbxrefGFFSource(final Qualifier qualifier) { + StringVector qualifier_strings = StreamQualifier.toStringVector(null, + qualifier); - for(int i=0; i<qualifier_strings.size(); i++) - { - String qualifier_string = (String)qualifier_strings.elementAt(i); + for (int i = 0; i < qualifier_strings.size(); i++) { + String qualifier_string = (String) qualifier_strings.elementAt(i); - if(qualifier_string.indexOf("GFF_source:") >-1) - { - int index = qualifier_string.indexOf(":")+1; + if (qualifier_string.indexOf("GFF_source:") > -1) { + int index = qualifier_string.indexOf(":") + 1; int len = qualifier_string.length(); - if(qualifier_string.endsWith("\"")) + if (qualifier_string.endsWith("\"")) len--; return qualifier_string.substring(index, len); } @@ -1222,136 +1097,112 @@ public class GFFStreamFeature extends SimpleDocumentFeature /** * Set the feature time last modified timestamp. + * * @param timelastmodified */ - public void setLastModified(final Timestamp timelastmodified) - { + public void setLastModified(final Timestamp timelastmodified) { this.timelastmodified = timelastmodified; // now update the qualifier value itself QualifierVector qualifiers = getQualifiers(); Qualifier qualifier = qualifiers.getQualifierByName("timelastmodified"); - SimpleDateFormat date_format = - new SimpleDateFormat("dd.MM.yyyy hh:mm:ss z"); - - if(qualifier != null) - qualifier.removeValue((String)qualifier.getValues().get(0)); - else - { - try - { + SimpleDateFormat date_format = new SimpleDateFormat("dd.MM.yyyy hh:mm:ss z"); + + if (qualifier != null) + qualifier.removeValue((String) qualifier.getValues().get(0)); + else { + try { qualifier = new Qualifier("timelastmodified", - date_format.format(timelastmodified)); + date_format.format(timelastmodified)); setQualifier(qualifier); return; + } catch (EntryInformationException eie) { + } catch (ReadOnlyException roe) { } - catch(EntryInformationException eie) - {} - catch(ReadOnlyException roe) - {} } qualifier.addValue(date_format.format(timelastmodified)); } /** - * Returns true if and only if this Feature can't be changed or can't be - * removed from it's entry. + * Returns true if and only if this Feature can't be changed or can't be + * removed from it's entry. **/ - public boolean isReadOnly () - { - if(readOnlyFeature) + public boolean isReadOnly() { + if (readOnlyFeature) return true; return super.isReadOnly(); } - public void setReadOnlyFeature(boolean readOnlyFeature) - { + public void setReadOnlyFeature(boolean readOnlyFeature) { this.readOnlyFeature = readOnlyFeature; } - public ChadoCanonicalGene getChadoGene() - { + public ChadoCanonicalGene getChadoGene() { return chadoGene; } - public void setChadoGene(ChadoCanonicalGene chadoGene) - { + public void setChadoGene(ChadoCanonicalGene chadoGene) { this.chadoGene = chadoGene; } - public boolean isVisible() - { + public boolean isVisible() { return visible; } - public void setVisible(boolean visible) - { + public void setVisible(boolean visible) { this.visible = visible; } - public String getGffSeqName() - { + public String getGffSeqName() { return gffSeqName; } - public void setGffSeqName(String gffSeqName) - { + public void setGffSeqName(String gffSeqName) { this.gffSeqName = gffSeqName; } - public String getGffSource() - { + public String getGffSource() { return gffSource; } - public void setGffSource(String gffSource) - { + public void setGffSource(String gffSource) { this.gffSource = gffSource; } - public boolean isLazyLoaded() - { + public boolean isLazyLoaded() { return lazyLoaded; } - public void setLazyLoaded(boolean lazyLoaded) - { + public void setLazyLoaded(boolean lazyLoaded) { this.lazyLoaded = lazyLoaded; } - public org.gmod.schema.sequence.Feature getChadoLazyFeature() - { + public org.gmod.schema.sequence.Feature getChadoLazyFeature() { return chadoLazyFeature; } public void setChadoLazyFeature( - org.gmod.schema.sequence.Feature chadoLazyFeature) - { + org.gmod.schema.sequence.Feature chadoLazyFeature) { this.chadoLazyFeature = chadoLazyFeature; } - protected static boolean isGTF(Feature feature) - { - if(!(feature instanceof GFFStreamFeature)) + protected static boolean isGTF(Feature feature) { + if (!(feature instanceof GFFStreamFeature)) return false; - final String names[] = { "ID", "Name", "Alias", "Parent", - "Derives_from", - "Target", "Gap", "Note", - "Dbxref", "Ontology_term" }; + final String names[] = { "ID", "Name", "Alias", "Parent", "Derives_from", + "Target", "Gap", "Note", "Dbxref", "Ontology_term" }; - for(String name: names) - { - if(feature.getQualifiers().getQualifierByName(name) != null) + for (String name : names) { + if (feature.getQualifiers().getQualifierByName(name) != null) return false; } - if(feature.getQualifiers().getQualifierByName("gene_id") != null && - feature.getQualifiers().getQualifierByName("transcript_id") != null) - { - if(feature.getEntry() != null) - logger4j.debug(feature.getEntry().getName()+" is in GTF format"); + if (feature.getQualifiers().getQualifierByName("gene_id") != null + && feature.getQualifiers().getQualifierByName("transcript_id") != null) { + if (feature.getEntry() != null) + logger4j.debug(feature.getEntry().getName() + " is in GTF format"); return true; } return false; diff --git a/uk/ac/sanger/artemis/util/DatabaseDocument.java b/uk/ac/sanger/artemis/util/DatabaseDocument.java index 6cba75df3449f18a6ac8c27112ac66ae89d5f06d..6addbd4fa2b71679539867aabd4e32301bc0f5a2 100644 --- a/uk/ac/sanger/artemis/util/DatabaseDocument.java +++ b/uk/ac/sanger/artemis/util/DatabaseDocument.java @@ -27,11 +27,11 @@ package uk.ac.sanger.artemis.util; import uk.ac.sanger.artemis.Options; import uk.ac.sanger.artemis.io.ChadoCanonicalGene; import uk.ac.sanger.artemis.io.DocumentEntry; +import uk.ac.sanger.artemis.io.GFF3Encoder; import uk.ac.sanger.artemis.io.GFFStreamFeature; import uk.ac.sanger.artemis.io.PartialSequence; import uk.ac.sanger.artemis.io.Range; import uk.ac.sanger.artemis.io.ReadFormatException; - import uk.ac.sanger.artemis.chado.ArtemisUtils; import uk.ac.sanger.artemis.chado.ChadoCvTermView; import uk.ac.sanger.artemis.chado.ChadoTransactionManager; @@ -1301,7 +1301,7 @@ public class DatabaseDocument extends Document // ortholog/paralog/cluster data int orthologueFeature = fr.getFeatureByObjectId().getFeatureId(); clusterOrthoParalog.append(cvTermName+"="+ - GFFStreamFeature.encode("object_id="+orthologueFeature+"; rank="+fr.getRank())+";"); + GFF3Encoder.encode("object_id="+orthologueFeature+"; rank="+fr.getRank())+";"); } } } @@ -1393,10 +1393,10 @@ public class DatabaseDocument extends Document if(qualifier_name == null) continue; if(featprop.getValue() != null) - this_buff.append(GFFStreamFeature.encode(qualifier_name)+ "=" + - GFFStreamFeature.encode(featprop.getValue())+";"); + this_buff.append(GFF3Encoder.encode(qualifier_name)+ "=" + + GFF3Encoder.encode(featprop.getValue())+";"); else - this_buff.append(GFFStreamFeature.encode(qualifier_name)+";"); + this_buff.append(GFF3Encoder.encode(qualifier_name)+";"); } } @@ -1408,7 +1408,7 @@ public class DatabaseDocument extends Document if(feat.getDbXRef() != null) { this_buff.append("Dbxref="); - this_buff.append(GFFStreamFeature.encode( + this_buff.append(GFF3Encoder.encode( feat.getDbXRef().getDb().getName()+":"+feat.getDbXRef().getAccession())); foundPrimaryDbXRef = true; if(dbxref == null || dbxref.size() == 0) @@ -1423,7 +1423,7 @@ public class DatabaseDocument extends Document this_buff.append("Dbxref="); for(int j=0; j<dbxref.size(); j++) { - this_buff.append(GFFStreamFeature.encode(dbxref.get(j))); + this_buff.append(GFF3Encoder.encode(dbxref.get(j))); if(j<dbxref.size()-1) this_buff.append(","); } @@ -1442,7 +1442,7 @@ public class DatabaseDocument extends Document this_buff.append(alias.getSynonym().getName()); if(!alias.isCurrent()) - this_buff.append(GFFStreamFeature.encode(";current=false")); + this_buff.append(GFF3Encoder.encode(";current=false")); //if(j<v_synonyms.size()-1) this_buff.append(";"); @@ -1511,9 +1511,9 @@ public class DatabaseDocument extends Document attr_buff.append("controlled_curation="); attr_buff.append("term="+ - GFFStreamFeature.encode(feature_cvterm.getCvTerm().getName())+"%3B"); + GFF3Encoder.encode(feature_cvterm.getCvTerm().getName())+"%3B"); attr_buff.append("cv="+ - GFFStreamFeature.encode(feature_cvterm.getCvTerm().getCv().getName())+"%3B"); + GFF3Encoder.encode(feature_cvterm.getCvTerm().getCv().getName())+"%3B"); // N.B. the db_xref may be a FeatureCvTermDbXRef or a Pub for /controlled_curation int nfound_dbxref = 0; @@ -1584,7 +1584,7 @@ public class DatabaseDocument extends Document attr_buff.append(getCvtermName(feature_cvtermprop.getCvTerm() .getCvTermId(), dao, gene_builder)); attr_buff.append("="); - attr_buff.append(GFFStreamFeature.encode(feature_cvtermprop.getValue())); + attr_buff.append(GFF3Encoder.encode(feature_cvtermprop.getValue())); if(i < feature_cvtermprops.size()-1) attr_buff.append("%3B"); } @@ -1657,7 +1657,7 @@ public class DatabaseDocument extends Document + dbXRef.getAccession() + "%3B"); attr_buff.append("term="+ - GFFStreamFeature.encode(feature_cvterm.getCvTerm().getName())+"%3B"); + GFF3Encoder.encode(feature_cvterm.getCvTerm().getName())+"%3B"); // PMID int nfound_pub = 0; @@ -1731,7 +1731,7 @@ public class DatabaseDocument extends Document attr_buff.append(getCvtermName(feature_cvtermprop.getCvTerm() .getCvTermId(), dao, gene_builder)); attr_buff.append("="); - attr_buff.append(GFFStreamFeature.encode(feature_cvtermprop.getValue())); + attr_buff.append(GFF3Encoder.encode(feature_cvtermprop.getValue())); if(i < feature_cvtermprops.size()-1) attr_buff.append("%3B"); }