Newer
Older
/* GFFDocumentEntry.java
*
* created: Tue Sep 14 1999
*
* This file is part of Artemis
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/GFFDocumentEntry.java,v 1.69 2009-09-03 13:47:31 tjc Exp $
import uk.ac.sanger.artemis.chado.FeatureLocLazyQualifierValue;
import uk.ac.sanger.artemis.components.filetree.LocalAndRemoteFileManager;
import uk.ac.sanger.artemis.components.genebuilder.GeneUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
tjc
committed
import org.gmod.schema.sequence.FeatureLoc;
/**
* A DocumentEntry that can read an GFF entry from a Document.
*
* @author Kim Rutherford
* @version $Id: GFFDocumentEntry.java,v 1.69 2009-09-03 13:47:31 tjc Exp $
/**
* Create a new GFFDocumentEntry object associated with the given
* Document.
* @param document This is the file that we will read from. This is also
* used for saving the entry back to the file it came from and to give
* the new object a name.
* @param listener The object that will listen for ReadEvents.
* @exception IOException thrown if there is a problem reading the entry -
* most likely ReadFormatException.
**/
GFFDocumentEntry(final Document document, final ReadListener listener)
throws IOException, EntryInformationException
{
super(new GFFEntryInformation(), document, listener);
tjc
committed
super.in_constructor = true;
if(original_features.size() > 0 && GFFStreamFeature.isGTF((Feature)original_features.get(0)))
{
// GTF
mergeGtfFeatures(original_features, "CDS");
mergeGtfFeatures(original_features, "exon");
}
else
{
// GFF
combineGeneFeatures(original_features);
}
tjc
committed
super.in_constructor = false;
finished_constructor = true;
}
/**
* Create a new GFFDocumentEntry that will be a copy of the given Entry and
* has no Document associated with it. The new GFFDocumentEntry cannot be
* saved to a file with save() unless save(Document) has been called
* first. Some qualifier and location information will be lost.
* @param force If true then invalid qualifiers and any features with
* invalid keys in the new Entry will be quietly thrown away. "Invalid"
* means that the key/qualifier is not allowed to occur in an Entry of
* this type (probably determined by the EntryInformation object of this
* Entry). If false an EntryInformationException will be thrown for
* invalid keys or qualifiers.
**/
public GFFDocumentEntry(final Entry new_entry, final boolean force)
throws EntryInformationException
{
super(new GFFEntryInformation(), new_entry, force);
finished_constructor = true;
}
/**
* Create a new empty GFFDocumentEntry object that has no Document
* associated with it. The new GFFDocumentEntry cannot be saved to a
* file with save() unless save(Document) has been called first. The
* save(Document) method will assign a Document.
public GFFDocumentEntry(final EntryInformation entry_information)
{
super(new GFFEntryInformation());
/**
* Returns true if and only if this entry is read only. For now this
* always returns true - GFFDocumentEntry objects can't be changed.
**/
/**
* Returns true if and only if this entry is read only. For now this
* always returns true - BlastDocumentEntry objects can't be changed.
**/
return isReadOnly;
}
public void setReadOnly(final boolean isReadOnly)
{
this.isReadOnly = isReadOnly;
}
/**
* If the given feature can be added directly to this Entry, then return
* it, otherwise create and return a new feature of the appropriate type.
* @param copy if true then always new a new copy of the Feature.
**/
protected Object makeNativeFeature(final Feature feature,
final boolean copy)
{
if(!copy && feature instanceof GFFStreamFeature)
return (GFFStreamFeature)feature;
else
{
if(PublicDBDocumentEntry.IGNORE_OBSOLETE_FEATURES)
{
Qualifier isObsoleteQualifier =
feature.getQualifiers().getQualifierByName("isObsolete");
if(isObsoleteQualifier != null)
{
String value = (String)isObsoleteQualifier.getValues().get(0);
if(Boolean.parseBoolean(value))
return null;
}
}
}
/**
* If the given Sequence can be added directly to this Entry, then return a
* copy of it, otherwise create and return a new feature of the appropriate
* type for this Entry.
**/
protected StreamSequence makeNativeSequence(final Sequence sequence)
{
return new FastaStreamSequence(sequence);
private void combineGeneFeatures(FeatureVector original_features)
{
Feature this_feature;
Hashtable chado_gene = new Hashtable();
try
{
// find the genes
for(int i = 0 ; i < original_features.size() ; ++i)
{
this_feature = original_features.featureAt(i);
final String key = this_feature.getKey().getKeyString();
if(this_feature instanceof GFFStreamFeature &&
(GeneUtils.isHiddenFeature(key) ||
GeneUtils.isObsolete((GFFStreamFeature)this_feature)))
((GFFStreamFeature)this_feature).setVisible(false);
if(key.equals("gene") || key.equals("pseudogene"))
final Qualifier idQualifier = this_feature.getQualifierByName("ID");
if(idQualifier != null)
{
String id = (String)this_feature.getQualifierByName("ID").getValues().get(0);
ChadoCanonicalGene gene = new ChadoCanonicalGene();
gene.setGene(this_feature);
chado_gene.put(id, gene);
((GFFStreamFeature)this_feature).setChadoGene(gene);
}
for(int i = 0 ; i < original_features.size() ; ++i)
{
this_feature = original_features.featureAt(i);
// transcript
Qualifier parent_qualifier = this_feature.getQualifierByName("Parent");
if(parent_qualifier == null)
continue;
StringVector parents = parent_qualifier.getValues();
for(int j=0; j<parents.size(); j++)
{
String parent = (String)parents.get(j);
if(chado_gene.containsKey(parent))
{
// store transcript
ChadoCanonicalGene gene = (ChadoCanonicalGene)chado_gene.get(parent);
gene.addTranscript(this_feature);
((GFFStreamFeature)this_feature).setChadoGene(gene);
// store the transcript ID with its ChadoCanonicalGene object
if(this_feature.getQualifierByName("ID") != null)
transcripts_lookup.put((String)this_feature.getQualifierByName("ID").getValues().get(0),
for(int i = 0 ; i < original_features.size() ; ++i)
{
this_feature = original_features.featureAt(i);
// exons
final Qualifier parent_qualifier = this_feature.getQualifierByName("Parent");
final Qualifier derives_qualifier = this_feature.getQualifierByName("Derives_from");
if(parent_qualifier == null && derives_qualifier == null)
this_feature.getQualifierByName("feature_relationship_rank");
// compare this features parent_id's to transcript id's in the
// chado gene hash to decide if it is part of it
final StringVector parent_id;
if(parent_qualifier != null)
parent_id = parent_qualifier.getValues();
else
parent_id = derives_qualifier.getValues();
final String parent = (String)parent_id.get(j);
final ChadoCanonicalGene gene = (ChadoCanonicalGene)transcripts_lookup.get(parent);
((GFFStreamFeature)this_feature).setChadoGene(gene);
//((GFFStreamFeature)this_feature).setVisible(false);
else if(key.equals("three_prime_UTR"))
gene.add3PrimeUtr(parent, this_feature);
else if(key.equals("five_prime_UTR"))
gene.add5PrimeUtr(parent, this_feature);
else if(key.equals(DatabaseDocument.EXONMODEL) || key.equals("exon") ||
featureRelationship != null ||
else
gene.addOtherFeatures(parent, this_feature);
if(getDocument() instanceof DatabaseDocument)
{
DatabaseDocument doc = (DatabaseDocument)getDocument();
{
// using lazy loading - add the lazy chado feature to GFFStreamFeature
final Hashtable idFeatureStore = doc.getIdFeatureStore();
for(int i = 0 ; i < original_features.size() ; ++i)
{
this_feature = original_features.featureAt(i);
String featureId = (String) this_feature.getQualifierByName("feature_id").getValues().get(0);
org.gmod.schema.sequence.Feature chadoLazyFeature =
(org.gmod.schema.sequence.Feature)idFeatureStore.get(featureId);
((GFFStreamFeature)this_feature).setChadoLazyFeature(chadoLazyFeature);
}
idFeatureStore.clear();
}
}
Enumeration enum_genes = chado_gene.elements();
while(enum_genes.hasMoreElements())
{
ChadoCanonicalGene gene = (ChadoCanonicalGene)enum_genes.nextElement();
combineChadoExons(gene);
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
// inferring CDS and UTRs
if(DatabaseDocument.CHADO_INFER_CDS)
{
final Vector transcripts = (Vector)gene.getTranscripts();
gene.correctSpliceSiteAssignments();
for(int i=0; i<transcripts.size(); i++)
{
GFFStreamFeature transcript = (GFFStreamFeature)transcripts.get(i);
String transcript_id = null;
transcript_id = GeneUtils.getUniqueName(transcript);
List exons = gene.getSpliceSitesOfTranscript(transcript_id, "exon");
if(exons == null)
continue;
Iterator it = exons.iterator();
while(it.hasNext())
{
final GFFStreamFeature exonFeature = (GFFStreamFeature)it.next();
QualifierVector qualifiers = new QualifierVector();
qualifiers.add(new Qualifier("ID", transcript_id+":CDS"));
qualifiers.add(new Qualifier("Parent", transcript_id));
DatabaseInferredFeature cdsFeature = new DatabaseInferredFeature(
Key.CDS, exonFeature.getLocation(), qualifiers, gene);
try
{
gene.addSplicedFeatures(transcript_id, cdsFeature);
forcedAdd(cdsFeature);
}
catch (ReadOnlyException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
tjc
committed
}
}
catch(InvalidRelationException e)
{
e.printStackTrace();
}
}
tjc
committed
* Get 'similarity', polypeptide_domain qualifiers
* @param fv
* @throws InvalidRelationException
*/
tjc
committed
private void loadFeatureLocLazyData(final FeatureVector fv)
final DatabaseDocument doc = (DatabaseDocument)getDocument();
List matches;
if(fv.size() < 30 && fv.size() > 0) // if just a few features to look up e.g. for gene editor
{
List featureIds = new Vector(fv.size());
for(int i=0;i<fv.size(); i++)
{
Qualifier featureIdQualifier = fv.featureAt(i).getQualifierByName("feature_id");
featureIds.add( (String)featureIdQualifier.getValues().get(0) );
}
matches = doc.getSimilarityMatches(featureIds);
}
else
matches = doc.getSimilarityMatches(null);
if(matches == null || matches.size() < 1)
return;
tjc
committed
final Hashtable temp_lookup_hash = new Hashtable(matches.size()/2);
String f_id;
for(int i=0; i<fv.size(); i++)
{
Feature f = (Feature)fv.elementAt(i);
Qualifier qualifier = ((Feature)f).getQualifierByName("feature_id");
if(qualifier != null)
{
f_id = (String)qualifier.getValues().get(0);
temp_lookup_hash.put(f_id, f);
}
}
tjc
committed
final Hashtable hashFeatureLocs = getFeatureLocsHash(doc, matches);
if(hashFeatureLocs == null)
return;
final Hashtable cvTermCache = new Hashtable();
for(int i=0; i<matches.size(); i++)
{
org.gmod.schema.sequence.Feature matchFeature =
(org.gmod.schema.sequence.Feature)matches.get(i);
final String qualifierName;
if(cvTerm.getName().indexOf("match") > -1)
qualifierName = "similarity";
else
qualifierName = cvTerm.getName();
tjc
committed
final List featureLocs =
(List) hashFeatureLocs.get(new Integer(matchFeature.getFeatureId()));
if(featureLocs == null)
continue;
matchFeature.setFeatureLocsForFeatureId(featureLocs);
//java.util.Collection featureLocs = matchFeature.getFeatureLocsForFeatureId();
java.util.Iterator it = featureLocs.iterator();
while(it.hasNext())
{
(Feature)temp_lookup_hash.get(Integer.toString(featureLoc.getSrcFeatureId()));
if(queryFeature != null)
Qualifier qualifier = queryFeature.getQualifierByName(qualifierName);
final FeatureLocLazyQualifierValue sim =
new FeatureLocLazyQualifierValue(matchFeature, featureLoc.getSrcFeatureId());
if(qualifier == null || !(qualifier instanceof QualifierLazyLoading))
qualifier = new QualifierLazyLoading(qualifierName, sim);
else
((QualifierLazyLoading)qualifier).addValue(sim);
try
{
queryFeature.setQualifier(qualifier);
}
catch(ReadOnlyException e)
{
e.printStackTrace();
}
catch(EntryInformationException e)
{
e.printStackTrace();
}
if(qualifierName.equals("polypeptide_domain") &&
LocalAndRemoteFileManager.domainLoad.isSelected())
addDomain(queryFeature, featureLoc, matchFeature);
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
/**
* Retrieve a CvTerm from a Hashtable with keys equal to the cvterm_id
* and values of the corresponding CvTerm. If the term is not in the cache
* then look it up in the main DatabaseDocument cache.
* @param cvTermCache
* @param matchFeature
* @param f
* @return
*/
private CvTerm getCvTermFromCache(final Hashtable cvTermCache,
final org.gmod.schema.sequence.Feature matchFeature,
final Feature f)
{
final Integer cvTermId = new Integer(matchFeature.getCvTerm().getCvTermId());
final CvTerm cvTerm;
if(cvTermCache.containsKey(cvTermId))
cvTerm = (CvTerm) cvTermCache.get(cvTermId);
else
{
cvTerm = DatabaseDocument.getCvTermByCvTermId(
matchFeature.getCvTerm().getCvTermId(), f);
cvTermCache.put(cvTermId, cvTerm);
}
matchFeature.setCvTerm(cvTerm);
return cvTerm;
}
/**
* Add domain features as read-only features
* @param queryFeature
* @param featureLoc
* @param matchFeature
*/
private void addDomain(final Feature queryFeature,
final FeatureLoc featureLoc,
org.gmod.schema.sequence.Feature matchFeature)
{
try
{
int start = queryFeature.getLocation().getFirstBase();
ChadoCanonicalGene chadoGene = ((GFFStreamFeature)queryFeature).getChadoGene();
if(chadoGene != null)
location = chadoGene.getNucLocation(queryFeature, featureLoc);
else if(queryFeature.getLocation().isComplement())
location = new Location("complement("+
(start+(featureLoc.getFmin()*3)+1)+".."+(start+(featureLoc.getFmax()*3))+")");
location = new Location(
(start+(featureLoc.getFmin()*3)+1)+".."+(start+(featureLoc.getFmax()*3)));
final GFFStreamFeature newFeature = new GFFStreamFeature(
new Key("polypeptide_domain"),
location, null);
newFeature.setReadOnlyFeature(true);
newFeature.setChadoLazyFeature(matchFeature);
add(newFeature);
}
catch (Exception e)
{
e.printStackTrace();
}
}
tjc
committed
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
/**
* Bulk load match features featureLoc's and create a Hashtable with the
* feature_id's as keys and a list of the corresponding featureLocs as values.
* @param doc
* @param matches
* @return the hashtable; null if no featureLocs are found
*/
private Hashtable getFeatureLocsHash(final DatabaseDocument doc, final List matches)
{
final List matchFeatureIds = new Vector(matches.size());
for(int i=0; i< matches.size(); i++)
{
String matchFeatureId = Integer.toString(
((org.gmod.schema.sequence.Feature)matches.get(i)).getFeatureId() );
matchFeatureIds.add( matchFeatureId );
}
final List allFeatureLocs = doc.getFeatureLocsByListOfIds(matchFeatureIds);
if(allFeatureLocs == null)
return null;
Hashtable hashFeatureLocs = new Hashtable();
for(int i=0;i<allFeatureLocs.size(); i++)
{
FeatureLoc featureLoc = (FeatureLoc)allFeatureLocs.get(i);
Integer featureId = new Integer(featureLoc.getFeatureByFeatureId().getFeatureId());
List list;
if(hashFeatureLocs.containsKey(featureId))
list = (List) hashFeatureLocs.get(featureId);
else
list = new Vector();
list.add(featureLoc);
hashFeatureLocs.put(featureId, list);
}
return hashFeatureLocs;
}
/**
* Combine the features (which are exons) and delete the orignals from this
* Entry. The key of this hash will be the group name and the value is a
* FeatureVector containing the feature that are in that group. Groups
* that have more than one member will be combined.
**/
private void combineChadoExons(ChadoCanonicalGene gene)
final List<Feature> transcripts = gene.getTranscripts();
GFFStreamFeature transcript = (GFFStreamFeature)transcripts.get(i);
if(transcript.getQualifierByName("ID") == null)
continue;
transcript_id = (String)(transcript.getQualifierByName("ID").getValues().get(0));
Set<String> splicedSiteTypes = gene.getSpliceTypes(transcript_id);
Iterator<String> it = splicedSiteTypes.iterator();
Vector<Feature> new_set = new Vector<Feature>();
String type = it.next();
List<Feature> splicedSites = gene.getSpliceSitesOfTranscript(transcript_id, type);
(String)(transcript.getQualifierByName("ID").getValues().get(0)),
transcript.getLocation().isComplement());
private void mergeFeatures(final List<Feature> gffFeatures,
final List<Feature> new_set,
final String transcript_id,
final boolean isComplement)
final Hashtable<String, Integer> feature_relationship_rank_store = new Hashtable<String, Integer>();
final Hashtable<String, Range> id_range_store = new Hashtable<String, Range>();
final RangeVector new_range_vector = new RangeVector();
final Qualifier codon_start = getCodonStart(gffFeatures, isComplement);
for(int j = 0; j < gffFeatures.size(); j++)
{
final GFFStreamFeature this_feature = (GFFStreamFeature)gffFeatures.get(j);
Integer rank;
Qualifier rankQualifier = this_feature
.getQualifierByName("feature_relationship_rank");
if(rankQualifier == null)
rank = new Integer(0);
else
{
rank = new Integer((String) (rankQualifier.getValues().get(0)));
this_feature.getQualifiers().removeQualifierByName("feature_relationship_rank");
}
// use the most current lastmodified datestamp
if(this_feature.getLastModified() != null
&& (lasttimemodified == null || this_feature.getLastModified()
.compareTo(lasttimemodified) > 0))
lasttimemodified = this_feature.getLastModified();
final Location this_feature_location = this_feature.getLocation();
if(this_feature_location.getRanges().size() > 1)
{
String id= "";
try
{
id = (String)this_feature.getQualifierByName("ID").getValues().get(0);
}
catch(Exception e){}
throw new Error("internal error - new location should have "
+ "exactly one range (there may be non-unique ID's):\n"+
this_feature_location.toStringShort()+"\n"+id);
}
final Range new_range = (Range) this_feature_location.getRanges()
.elementAt(0);
Qualifier id_qualifier = this_feature.getQualifierByName("ID");
if(id_qualifier != null)
{
String id = (String) (id_qualifier.getValues()).elementAt(0);
id_range_store.put(id, new_range);
feature_relationship_rank_store.put(id, rank);
}
else
Splash.logger4j.warn("NO ID FOUND FOR FEATURE AT: "+
this_feature.getLocation().toString());
if(this_feature_location.isComplement())
new_range_vector.insertElementAt(new_range, 0);
else
new_range_vector.add(new_range);
removeInternal(this_feature);
final GFFStreamFeature first_old_feature = (GFFStreamFeature)gffFeatures.get(0);
final Location new_location = new Location(new_range_vector,
first_old_feature.getLocation().isComplement());
if(codon_start != null)
{
QualifierVector tmp_qualifier_vector = new QualifierVector();
for(Qualifier q: qualifiers)
if(!q.getName().equals("codon_start"))
tmp_qualifier_vector.addElement(q);
qualifiers = tmp_qualifier_vector;
qualifiers.setQualifier(codon_start);
}
qualifiers = mergeQualifiers(qualifiers);
final GFFStreamFeature new_feature = new GFFStreamFeature(first_old_feature
if(lasttimemodified != null)
new_feature.setLastModified(lasttimemodified);
if(first_old_feature.getChadoGene() != null)
new_feature.setChadoGene(first_old_feature.getChadoGene());
new_feature.setSegmentRangeStore(id_range_store);
new_feature
.setFeature_relationship_rank_store(feature_relationship_rank_store);
new_feature.setGffSource(first_old_feature.getGffSource());
new_feature.setGffSeqName(first_old_feature.getGffSeqName());
// set the ID
String ID;
try
{
ID = new_feature.getSegmentID(new_feature.getLocation().getRanges());
}
catch(NullPointerException npe)
{
if(new_feature.getQualifierByName("Parent") != null)
ID = ((String)new_feature.getQualifierByName("Parent").getValues().get(0)) +
":"+new_feature.getKey().getKeyString();
else
ID = new_feature.getKey().getKeyString();
}
final Qualifier id_qualifier = new_feature.getQualifierByName("ID");
id_qualifier.removeValue((String)(id_qualifier.getValues()).elementAt(0));
// set visibility
if(GeneUtils.isHiddenFeature(new_feature.getKey().getKeyString()) ||
GeneUtils.isObsolete(new_feature))
new_feature.setVisible(false);
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
try
{
new_feature.setLocation(new_location);
final Qualifier gene_qualifier = new_feature.getQualifierByName("gene");
if(gene_qualifier != null
&& gene_qualifier.getValues().size() > 0
&& ((String) (gene_qualifier.getValues()).elementAt(0))
.startsWith("Phat"))
{
// special case to handle incorrect output of the Phat gene
// prediction tool
new_feature.removeQualifierByName("codon_start");
}
forcedAdd(new_feature);
//gene.addExon(transcript_id, new_feature, true );
new_set.add(new_feature);
}
catch(ReadOnlyException e)
{
throw new Error("internal error - unexpected exception: " + e);
}
catch(OutOfRangeException e)
{
throw new Error("internal error - unexpected exception: " + e);
}
catch(EntryInformationException e)
{
throw new Error("internal error - unexpected exception: " + e);
private QualifierVector mergeQualifiers(QualifierVector qualifier_vector)
{
QualifierVector merge_qualifier_vector = new QualifierVector();
for(int i = 0 ; i < qualifier_vector.size() ; ++i)
{
Qualifier qual = (Qualifier)qualifier_vector.elementAt(i);
{
merge_qualifier_vector.addElement(qual);
seen = true;
}
}
final Qualifier id_qualifier =
merge_qualifier_vector.getQualifierByName("Alias");
if(id_qualifier == null)
merge_qualifier_vector.addElement(qual);
else
{
String id1 = (String)(id_qualifier.getValues()).elementAt(0);
String id2 = (String)(qual.getValues()).elementAt(0);
id_qualifier.removeValue(id1);
id_qualifier.addValue(id1+","+id2);
}
}
else if(!qual.getName().equals("ID") &&
!qual.getName().equals("feature_id"))
merge_qualifier_vector.setQualifier(qual);
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
/**
* Merge function for GTF features
* @param original_features
* @param keyStr
* @throws ReadOnlyException
*/
private void mergeGtfFeatures(FeatureVector original_features, String keyStr) throws ReadOnlyException
{
Hashtable<String, Vector<GFFStreamFeature>> group = new Hashtable<String, Vector<GFFStreamFeature>>();
for(int i=0; i<original_features.size(); i++)
{
GFFStreamFeature feature = (GFFStreamFeature)original_features.get(i);
if(!feature.getKey().getKeyString().equals(keyStr))
continue;
String transcriptId =
((String) feature.getQualifierByName("transcript_id").getValues().get(0)).replaceAll("'", "");
if(group.containsKey(transcriptId))
group.get(transcriptId).add(feature);
else
{
Vector<GFFStreamFeature> this_group = new Vector<GFFStreamFeature>();
this_group.add(feature);
group.put(transcriptId, this_group);
}
}
Enumeration<String> enumGroup = group.keys();
while(enumGroup.hasMoreElements())
{
String transcriptId = enumGroup.nextElement();
Vector<GFFStreamFeature> this_group = group.get(transcriptId);
QualifierVector qualifier_vector = new QualifierVector();
final RangeVector new_range_vector = new RangeVector();
for(GFFStreamFeature this_feature: this_group)
{
removeInternal(this_feature);
qualifier_vector.addAll(this_feature.getQualifiers());
final Range new_range = (Range) this_feature.getLocation().getRanges().elementAt(0);
if(this_feature.getLocation().isComplement())
new_range_vector.insertElementAt(this_feature.getLocation().getTotalRange(), 0);
else
new_range_vector.add(new_range);
}
final GFFStreamFeature old_feature = (GFFStreamFeature)this_group.get(0);
final Location new_location = new Location(new_range_vector,
old_feature.getLocation().isComplement());
qualifier_vector = mergeQualifiers(qualifier_vector);
if(qualifier_vector.getQualifierByName("gene_id") != null)
qualifier_vector.addQualifierValues(new Qualifier("ID",
keyStr+":"+qualifier_vector.getQualifierByName("gene_id").getValues().get(0)));
final GFFStreamFeature new_feature = new GFFStreamFeature(old_feature
.getKey(), new_location, qualifier_vector);
forcedAdd(new_feature);
}
}
/**
* Get the phase/codon_start for the first feature segment
* @param gffFeatures
* @param isComplement
* @return
*/
private Qualifier getCodonStart(final List<Feature> gffFeatures, final boolean isComplement)
{
Feature firstFeature = null;
for(Feature f: gffFeatures)
{
final GFFStreamFeature this_feature = (GFFStreamFeature)f;
if(isComplement && this_feature.getFirstBase() > fstart)
{
firstFeature = this_feature;
fstart = this_feature.getFirstBase();
}
else if(!isComplement && this_feature.getFirstBase() < fstart)
{
firstFeature = this_feature;
fstart = this_feature.getFirstBase();
}
}
if(firstFeature == null)
return null;
try
{
Qualifier codon_start = firstFeature.getQualifierByName("codon_start");
if(codon_start != null)
return codon_start.copy();
}
catch (InvalidRelationException e){}
return null;
}
/**
* Adjust feature coordinates to match the contig positions when loaded
* with a multiple fasta.
* @param sequenceEntry sequence entry
*/
public void adjustCoordinates(uk.ac.sanger.artemis.Entry sequenceEntry)
{
final Entry entry;
if(sequenceEntry != null)
entry = sequenceEntry.getEMBLEntry();
else
entry = this;
if(entry instanceof SimpleDocumentEntry)
{
// adjust feature coordinates to match contig positions
final Hashtable<String, Range> contig_ranges = ((SimpleDocumentEntry)entry).contig_ranges;
if(contig_ranges != null)
{
final FeatureVector gff_regions = getAllFeatures();
final Enumeration<Feature> gff_features = gff_regions.elements();
while(gff_features.hasMoreElements())
{
final Feature f = gff_features.nextElement();
if( !(f instanceof GFFStreamFeature) )
continue;
final String name = ((GFFStreamFeature)f).getGffSeqName();
if(name == null)
continue;
if(contig_ranges.containsKey(name))
{
try
{
final Range new_range = contig_ranges.get(name);
final RangeVector new_ranges = new RangeVector();
final RangeVector ranges = f.getLocation().getRanges();
for(int i = 0 ; i<ranges.size () ; ++i)
{
final Range r = (Range)ranges.elementAt(i);
new_ranges.add(new Range(r.getStart()+new_range.getStart()-1,
r.getEnd()+new_range.getStart()-1));
}
Location l = new Location(new_ranges, f.getLocation().isComplement());
f.setLocation(l);
((uk.ac.sanger.artemis.Feature)f.getUserData()).setLocation(l);