diff --git a/uk/ac/sanger/artemis/io/GFFDocumentEntry.java b/uk/ac/sanger/artemis/io/GFFDocumentEntry.java index 16dd6ffa8d3ab65f83bce9140e4c108db2e8fb21..d5a70fd783b61e050ba0a04aeb422c030ff71488 100644 --- a/uk/ac/sanger/artemis/io/GFFDocumentEntry.java +++ b/uk/ac/sanger/artemis/io/GFFDocumentEntry.java @@ -72,7 +72,18 @@ public class GFFDocumentEntry extends SimpleDocumentEntry super(new GFFEntryInformation(), document, listener); super.in_constructor = true; // join the separate exons into one feature (if appropriate) - combineGeneFeatures(); + final FeatureVector original_features = getAllFeatures(); + if(original_features.size() > 0 && ((GFFStreamFeature)original_features.get(0)).isGTF()) + { + // GTF + mergeGtfFeatures(original_features, "CDS"); + mergeGtfFeatures(original_features, "exon"); + } + else + { + // GFF + combineGeneFeatures(original_features); + } super.in_constructor = false; finished_constructor = true; } @@ -163,10 +174,8 @@ public class GFFDocumentEntry extends SimpleDocumentEntry return new FastaStreamSequence(sequence); } - private void combineGeneFeatures() + private void combineGeneFeatures(FeatureVector original_features) { - final FeatureVector original_features = getAllFeatures(); - Feature this_feature; Hashtable chado_gene = new Hashtable(); try @@ -817,5 +826,66 @@ public class GFFDocumentEntry extends SimpleDocumentEntry } return merge_qualifier_vector; } + + /** + * Merge function for GTF features + * @param original_features + * @param keyStr + * @throws ReadOnlyException + */ + private void mergeGtfFeatures(FeatureVector original_features, String keyStr) throws ReadOnlyException + { + Hashtable<String, Vector<GFFStreamFeature>> group = new Hashtable<String, Vector<GFFStreamFeature>>(); + for(int i=0; i<original_features.size(); i++) + { + GFFStreamFeature feature = (GFFStreamFeature)original_features.get(i); + if(!feature.getKey().getKeyString().equals(keyStr)) + continue; + String transcriptId = + ((String) feature.getQualifierByName("transcript_id").getValues().get(0)).replaceAll("'", ""); + if(group.containsKey(transcriptId)) + group.get(transcriptId).add(feature); + else + { + Vector<GFFStreamFeature> this_group = new Vector<GFFStreamFeature>(); + this_group.add(feature); + group.put(transcriptId, this_group); + } + } + + Enumeration<String> enumGroup = group.keys(); + while(enumGroup.hasMoreElements()) + { + String transcriptId = enumGroup.nextElement(); + Vector<GFFStreamFeature> this_group = group.get(transcriptId); + QualifierVector qualifier_vector = new QualifierVector(); + final RangeVector new_range_vector = new RangeVector(); + + for(GFFStreamFeature this_feature: this_group) + { + removeInternal(this_feature); + qualifier_vector.addAll(this_feature.getQualifiers()); + + final Range new_range = (Range) this_feature.getLocation().getRanges().elementAt(0); + if(this_feature.getLocation().isComplement()) + new_range_vector.insertElementAt(this_feature.getLocation().getTotalRange(), 0); + else + new_range_vector.add(new_range); + } + final GFFStreamFeature old_feature = (GFFStreamFeature)this_group.get(0); + + final Location new_location = new Location(new_range_vector, + old_feature.getLocation().isComplement()); + + qualifier_vector = mergeQualifiers(qualifier_vector, new_location.isComplement()); + if(qualifier_vector.getQualifierByName("gene_id") != null) + qualifier_vector.addQualifierValues(new Qualifier("ID", + keyStr+":"+qualifier_vector.getQualifierByName("gene_id").getValues().get(0))); + + final GFFStreamFeature new_feature = new GFFStreamFeature(old_feature + .getKey(), new_location, qualifier_vector); + forcedAdd(new_feature); + } + } } \ No newline at end of file diff --git a/uk/ac/sanger/artemis/io/GFFStreamFeature.java b/uk/ac/sanger/artemis/io/GFFStreamFeature.java index 6f469f1a144873071adae17773ad5cada403a1ee..70a7f9a8cb64317a305baa7195a227cc7581a102 100644 --- a/uk/ac/sanger/artemis/io/GFFStreamFeature.java +++ b/uk/ac/sanger/artemis/io/GFFStreamFeature.java @@ -1348,6 +1348,28 @@ public class GFFStreamFeature extends SimpleDocumentFeature this.chadoLazyFeature = chadoLazyFeature; } + protected boolean isGTF() + { + final String names[] = { "ID", "Name", "Alias", "Parent", + "Derives_from", + "Target", "Gap", "Note", + "Dbxref", "Ontology_term" }; + + for(String name: names) + { + if(getQualifiers().getQualifierByName(name) != null) + return false; + } + + if(getQualifiers().getQualifierByName("gene_id") != null && + getQualifiers().getQualifierByName("transcript_id") != null) + { + logger4j.debug(getEntry().getName()+" is in GTF format"); + return true; + } + return false; + } + public static void main(String args[]) { Key key = new Key("region");