Newer
Older
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
package uk.ac.sanger.artemis.util;
import uk.ac.sanger.artemis.io.ChadoCanonicalGene;
import uk.ac.sanger.artemis.io.ReadFormatException;
import uk.ac.sanger.artemis.chado.IBatisDAO;
import uk.ac.sanger.artemis.chado.JdbcDAO;
import uk.ac.sanger.artemis.chado.GmodDAO;
import uk.ac.sanger.artemis.chado.ChadoTransaction;
import uk.ac.sanger.artemis.components.database.DatabaseEntrySource;
import org.gmod.schema.sequence.Feature;
import org.gmod.schema.sequence.FeatureProp;
import org.gmod.schema.sequence.FeatureLoc;
import org.gmod.schema.sequence.FeatureRelationship;
import org.gmod.schema.sequence.FeatureSynonym;
import org.gmod.schema.sequence.FeatureCvTerm;
import org.gmod.schema.sequence.FeatureCvTermProp;
import org.gmod.schema.general.DbXRef;
* Objects of this class are Documents created from a relational database.
*
*/
/** database schema */
private String schema = "public";
private static Hashtable cvterms;
private InputStreamProgressListener progress_listener;
/** JDBC DAO */
private JdbcDAO jdbcDAO = null;
/** iBatis DAO */
private IBatisDAO connIB = null;
private String[] types = { "exon", "gene", "CDS", "transcript" };
private List schema_list;
private boolean gene_builder;
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
*
*/
public DatabaseDocument(String location, JPasswordField pfield)
if(location.indexOf('=') > -1)
this.schema = location.substring( location.indexOf('=')+ 1);
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* @param feature_id
* ID of a feature to be extracted.
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
* @param feature_id
* ID of a feature to be extracted.
* @param splitGFFEntry
* split into separate entries based on feature types.
* @param progress_listener
* input stream progress listener
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
InputStreamProgressListener progress_listener)
{
super(location);
this.progress_listener = progress_listener;
/**
* Used by the gene builder to read a database entry
* for a single gene.
* @param location
* @param pfield
* @param feature_id
* @param schema
* @param gene_builder
*/
public DatabaseDocument(String location, JPasswordField pfield,
String feature_id, String schema, boolean gene_builder)
{
super(location);
this.pfield = pfield;
this.feature_id = feature_id;
this.schema = schema;
this.gene_builder = gene_builder;
if(System.getProperty("ibatis") != null)
{
iBatis = true;
System.setProperty("chado", location);
}
}
public DatabaseDocument(String location, JPasswordField pfield,
/**
* Reset the schema.
* @param location
* @param schema
*/
private void reset(String location, String schema)
{
this.schema = schema;
if(!location.endsWith("="+schema))
{
int index = location.lastIndexOf('=');
setLocation(location.substring(0,index+1) + schema);
connIB = null;
jdbcDAO = null;
System.setProperty("chado", (String)getLocation());
}
}
return new DatabaseDocument( ((String)getLocation()) + name, pfield);
* Return the name of this Document (the last element of the Document
* location).
*/
public String getName()
int ind = ((String) getLocation()).indexOf("?");
String name = ((String) getLocation()).substring(0, ind);
/**
* Set the name of this document.
*/
public void setName(String name)
{
this.name = name;
}
public DatabaseDocument createDatabaseDocument()
return new DatabaseDocument( (String)getLocation(), pfield,
feature_id, schema );
* Return true if and only if the Document refered to by this object exists
* and is readable. Always returns true.
*/
public boolean readable()
* Return true if and only if the Document refered to by this object exists
* and can be written to. Always returns false.
*/
public boolean writable()
* Create a new InputStream object from this Document. The contents of the
* Document can be read from the InputStream.
*
* @exception IOException
* Thrown if the Document can't be read from (for example if it
* doesn't exist).
*/
public InputStream getInputStream() throws IOException
ByteArrayInputStream instream;
if(gff_buff != null)
{
instream = new ByteArrayInputStream(gff_buff.getBytes());
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).startTransaction();
// if creating a gene builder
if(gene_builder)
{
List schemaList = new Vector();
schemaList.add(schema);
return new ByteArrayInputStream(getGeneFeature(feature_id,
schemaList, dao).getBytes());
}
gff_buffer = getGff(dao, feature_id);
if(splitGFFEntry)
if(gff_buffer[0].size() > 0)
entry.append(gff_buffer[0]);
getChadoSequence(dao, entry);
}
else
{
for(int i = 0; i < gff_buffer.length; i++)
{
if(gff_buffer[i].size() > 0)
entry.append(gff_buffer[i]);
}
getChadoSequence(dao, entry);
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).commitTransaction();
}
finally
{
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).endTransaction();
}
instream = new ByteArrayInputStream(entry.getBytes());
catch(RuntimeException re)
{
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
re.getMessage(),
"Problems Reading From the Database ",
JOptionPane.ERROR_MESSAGE);
re.printStackTrace();
}
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
sqlExp.getMessage(),
"Problems Reading From the Database ",
sqlExp.printStackTrace();
}
return null;
}
*
* Called (by DatabaseEntrySource) to retrieve all the documents for each
* entry created.
*
*/
public DatabaseDocument[] getGffDocuments(String location, String id,
String schema)
{
if(gff_buffer[i].size() > 0)
nentries++;
}
DatabaseDocument[] new_docs = new DatabaseDocument[nentries];
nentries = 0;
String name;
if(i >= types.length)
name = "other";
else
name = types[i];
new_docs[nentries] = new DatabaseDocument(location, pfield, id, schema,
gff_buffer[i], name);
* Create an array of GFF lines.
* @param dao the data access object
* @param parentFeatureID the parent identifier for the features to
* extract
* @return the <code>ByteBuffer</code> array of GFF lines
private ByteBuffer[] getGff(GmodDAO dao, String parentFeatureID)
Feature srcFeature = new Feature();
srcFeature.setFeatureId(srcfeature_id);
featureloc.setFeatureBySrcFeatureId(srcFeature);
//featureloc.setSrcfeature_id(srcfeature_id);
List featList = dao.getFeaturesByLocatedOnFeature(parent);
ByteBuffer[] buffers = new ByteBuffer[types.length + 1];
for(int i = 0; i < buffers.length; i++)
final Feature parentFeature = dao.getFeatureById(srcfeature_id);
Hashtable id_store = new Hashtable(feature_size);
for(int i = 0; i < feature_size; i++)
{
dao.getFeatureDbXRefsByFeatureUniquename(null));
Hashtable synonym = getAllFeatureSynonyms(dao, null);
Hashtable featureCvTerms = getFeatureCvTermsByFeature(dao);
if(featureCvTerms != null)
System.out.println("\n\n"+featureCvTerms.size()+"\n\n");
long type_id = feat.getCvTerm().getCvTermId();
String typeName = getCvtermName(type_id, dao);
{
if(types[j].equals(typeName))
this_buff = buffers[j];
}
dbxrefs, synonym, featureCvTerms,
if( i%10 == 0 || i == feature_size-1)
progress_listener.progressMade("Read from database: " +
* Get a <code>Hashtable</code> of feature_id keys and their corresponding
* feature_synonym
private Hashtable getAllFeatureSynonyms(final GmodDAO dao,
List list = dao.getFeatureSynonymsByFeatureUniquename(uniquename);
Hashtable synonym = new Hashtable();
Integer feature_id;
if(synonym.containsKey(feature_id))
value = (Vector)synonym.get(feature_id);
else
value = new Vector();
value.add(alias);
synonym.put(feature_id, value);
}
return synonym;
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
}
private Hashtable getFeatureCvTermsByFeature(final GmodDAO dao)
{
List list = dao.getFeatureCvTermsByFeature(null);
Hashtable featureCvTerms = new Hashtable();
Integer feature_id;
List value;
FeatureCvTerm feature_cvterm;
for(int i=0; i<list.size(); i++)
{
feature_cvterm = (FeatureCvTerm)list.get(i);
feature_id = new Integer(feature_cvterm.getFeature().getFeatureId());
if(featureCvTerms.containsKey(feature_id))
value = (Vector)featureCvTerms.get(feature_id);
else
value = new Vector();
value.add(feature_cvterm);
featureCvTerms.put(feature_id, value);
}
return featureCvTerms;
}
/**
* Use by the gene editor to retrieve the gene and related
* features
* @param search_gene gene uniquename
* @param schema_search schema list to search
* @param dao data access method
* @return GFF byte buffer
* @throws SQLException
* @throws ReadFormatException
private ByteBuffer getGeneFeature(final String search_gene,
final List schema_search,
throws SQLException, ReadFormatException, ConnectException
{
Hashtable id_store = new Hashtable();
reset((String)getLocation(), (String)schema_search.get(0));
dao = getDAO();
Feature feature =
(Feature)dao.getFeatureByUniqueName(search_gene);
ChadoCanonicalGene chado_gene = new ChadoCanonicalGene();
id_store.put(Integer.toString(feature.getFeatureId()), feature.getUniqueName());
List featurelocs = new Vector(feature.getFeatureLocsForFeatureId());
FeatureLoc featureloc = (FeatureLoc) featurelocs.get(0);
int src_id = featureloc.getFeatureBySrcFeatureId().getFeatureId();
parent = dao.getFeatureById(src_id); //.getLazyFeature(parent);
chado_gene.setSeqlen(parent.getSeqLen().intValue());
ByteBuffer buff = new ByteBuffer();
chadoToGFF(feature, null, null, null, null, null, dao,
featureloc, buff);
// get children of gene
List relations = new Vector(feature.getFeatureRelationshipsForObjectId());
for(int i = 0; i < relations.size(); i++)
{
int id = ((FeatureRelationship) relations.get(i)).getFeatureBySubjectId().getFeatureId();
Feature transcript =
(Feature)dao.getFeatureById(id); //.getLazyFeature(transcript);
FeatureLoc loc = getFeatureLoc(new Vector(
transcript.getFeatureLocsForFeatureId()), src_id);
chadoToGFF(transcript, feature.getUniqueName(), null,
null, null, id_store, dao, loc, buff);
// get children of transcript - exons and pp
List transcipt_relations = new Vector(
transcript.getFeatureRelationshipsForObjectId());
for(int j = 0; j < transcipt_relations.size(); j++)
{
id = ((FeatureRelationship) transcipt_relations.get(j)).getFeatureBySubjectId().getFeatureId();
//Feature child = new Feature();
//child.setId(((FeatureRelationship) transcipt_relations.get(j))
Feature child =
(Feature)dao.getFeatureById(id); //dao.getLazyFeature(child);
id_store.put(Integer.toString(child.getFeatureId()), child.getUniqueName());
new Vector(child.getFeatureLocsForFeatureId()),src_id);
chadoToGFF(child, transcript.getUniqueName(), null,
null, null, id_store, dao, loc, buff);
}
}
return buff;
}
/**
* Convert the chado feature into a GFF line
* @param feat Chado feature
* @param parentFeature parent of this feature
* @param dbxrefs hashtable containing dbxrefs
* @param synonym hashtable containing synonynms
* @param id_store id store for looking up parent names
* @param dao chado data access
* @param featureloc feature location for this chado feature
private static void chadoToGFF(final Feature feat,
final String parentFeature,
final Hashtable dbxrefs,
final Hashtable synonym,
final Hashtable featureCvTerms,
final Hashtable id_store,
final ByteBuffer this_buff)
final int fmin = featureloc.getFmin().intValue() + 1;
final int fmax = featureloc.getFmax().intValue();
final long type_id = feat.getCvTerm().getCvTermId();
final Short strand = featureloc.getStrand();
final Integer phase = featureloc.getPhase();
final String name = feat.getUniqueName();
final String typeName = getCvtermName(type_id, dao);
final Integer feature_id = new Integer(feat.getFeatureId());
final String timelastmodified = Long.toString(feat.getTimeLastModified().getTime());
String parent_id = null;
String parent_relationship = null;
/* if(feat.getFeatureRelationship() != null)
FeatureRelationship feat_relationship = feat.getFeatureRelationship();
parent_id = Integer.toString(feat_relationship.getFeatureByObjectId().getFeatureId());
long parent_type_id = feat_relationship.getCvTerm().getCvTermId();
parent_relationship = feat_relationship.getCvTerm().getName();
if(parent_relationship == null)
parent_relationship = getCvtermName(parent_type_id, dao);
}
else */
if(feat.getFeatureRelationshipsForSubjectId() != null)
List relations = new Vector(feat.getFeatureRelationshipsForSubjectId());
FeatureRelationship feat_relationship =
(FeatureRelationship)relations.get(i);
parent_id = Integer.toString(feat_relationship.getFeatureByObjectId().getFeatureId());
if( feat_relationship.getCvTerm().getName() == null )
{
long parent_type_id = feat_relationship.getCvTerm().getCvTermId();
parent_relationship = getCvtermName(parent_type_id, dao);
}
else
parent_relationship = feat_relationship.getCvTerm().getName();
}
}
if(parent_id != null && id_store != null && id_store.containsKey(parent_id))
parent_id = (String)id_store.get(parent_id);
// make gff format
Vector dbxref = null;
// append dbxrefs
if(dbxrefs != null &&
dbxrefs.containsKey(feature_id))
dbxref = (Vector)dbxrefs.get(feature_id);
if(((String)dbxref.get(j)).startsWith("GFF_source:"))
gff_source = ((String)dbxref.get(j)).substring(11);
dbxref.removeElementAt(j);
this_buff.append(parentFeature + "\t"); // seqid
if(gff_source != null)
this_buff.append(gff_source+"\t"); // source
else
this_buff.append("chado\t");
this_buff.append(typeName + "\t"); // type
this_buff.append(fmin + "\t"); // start
this_buff.append(fmax + "\t"); // end
this_buff.append(".\t"); // score
if(strand.equals( new Short((short)-1)) ) // strand
else if(strand.equals( new Short((short)1)) )
this_buff.append("+\t");
else
this_buff.append(".\t");
this_buff.append(".\t"); // phase
else
this_buff.append(phase+"\t");
this_buff.append("ID=" + name + ";");
if(parent_id != null && !parent_id.equals("0"))
{
if(parent_relationship.equals("derives_from"))
this_buff.append("Derives_from=" + parent_id + ";");
this_buff.append("Parent=" + parent_id + ";");
}
this_buff.append("timelastmodified=" + timelastmodified + ";");
// this is the chado feature_relationship.rank used
// to order features e.g. exons
if(rank > -1)
this_buff.append("feature_relationship_rank="+rank+";");
//this_buff.append("feature_id="+feature_id+";");
if(feat.getFeatureProps() != null &&
feat.getFeatureProps().size() > 0)
List featureprops = (List)feat.getFeatureProps();
for(int j=0; j<featureprops.size(); j++)
FeatureProp featprop = (FeatureProp)featureprops.get(j);
String qualifier_name = getCvtermName(featprop.getCvTerm().getCvTermId(), dao);
if(featprop.getValue() != null)
this_buff.append(qualifier_name+ "=" +
GFFStreamFeature.encode(featprop.getValue())+";");
// append dbxrefs
if(dbxref != null && dbxref.size() > 0)
{
this_buff.append("Dbxref=");
for(int j=0; j<dbxref.size(); j++)
this_buff.append((String)dbxref.get(j));
if(j<dbxref.size()-1)
this_buff.append(",");
this_buff.append(";");
}
// append synonyms
if(synonym != null &&
synonym.containsKey(feature_id))
Vector v_synonyms = (Vector)synonym.get(feature_id);
this_buff.append( getCvtermName(alias.getSynonym().getCvTerm().getCvTermId(), dao) + "=" );
//this_buff.append(alias.getSynonym().getCvterm().getName()+"=");
this_buff.append(alias.getSynonym().getName());
if(j<v_synonyms.size()-1)
this_buff.append(";");
if(featureCvTerms != null &&
featureCvTerms.containsKey(feature_id))
{
FeatureCvTerm feature_cvterm;
Vector v_feature_cvterms = (Vector)featureCvTerms.get(feature_id);
for(int j=0; j<v_feature_cvterms.size(); j++)
{
feature_cvterm = (FeatureCvTerm)v_feature_cvterms.get(j);
CvTerm cvterm = getCvTerm( feature_cvterm.getCvTerm().getCvTermId(), dao);
DbXRef dbXRef = feature_cvterm.getCvTerm().getDbXRef();
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
if(cvterm.getCv().getName().equals("genedb_controlledcuration"))
{
int cvtermId = feature_cvterm.getCvTerm().getCvTermId();
String cvName = getCvTerm(cvtermId, dao).getCv().getName();
this_buff.append("controlled_curation=");
this_buff.append("cv="+cvName+"%3B");
this_buff.append("term="+feature_cvterm.getCvTerm().getName()+"%3B");
this_buff.append("db_xref="+dbXRef.getDb().getName() + ":"
+ dbXRef.getAccession() + "%3B");
List feature_cvtermprops = (List)feature_cvterm.getFeatureCvTermProps();
for(int i=0; i<feature_cvtermprops.size(); i++)
{
FeatureCvTermProp feature_cvtermprop = (FeatureCvTermProp)feature_cvtermprops.get(i);
this_buff.append(getCvtermName(feature_cvtermprop.getCvTerm().getCvTermId(), dao));
this_buff.append("=");
this_buff.append(feature_cvtermprop.getValue());
if(i<feature_cvtermprops.size())
this_buff.append("%3B");
}
this_buff.append(";");
}
else
{
this_buff.append("GO=");
if(cvterm.getCv().getName().equals("molecular_function"))
this_buff.append("aspect=F%3B");
else if(cvterm.getCv().getName().equals("cellular_component"))
this_buff.append("aspect=C%3B");
else if(cvterm.getCv().getName().equals("biological_process"))
this_buff.append("aspect=P%3B");
if(feature_cvterm.isNot())
this_buff.append("qualifier=NOT%3B");
this_buff.append("GOid="+dbXRef.getDb().getName() + ":"
+ dbXRef.getAccession() + ";");
}
}
//System.out.println(new String(this_buff.getBytes()));
}
/**
* Look up the cvterm_id for a controlled vocabulary name.
* @param name
* @return
*/
Enumeration enum_cvterm = cvterms.keys();
while(enum_cvterm.hasMoreElements())
{
Long key = (Long)enum_cvterm.nextElement();
if(name.equals( ((CvTerm)cvterms.get(key)).getName() ))
* Look up a cvterm name from the collection of cvterms.
* @param id a cvterm_id
* @return the cvterm name
private static String getCvtermName(long id, GmodDAO dao)
{
return getCvTerm(id, dao).getName();
}
private static CvTerm getCvTerm(long id, GmodDAO dao)
return (CvTerm)cvterms.get(new Long(id));
* @param dao the data access object
* @return the cvterm <code>Hashtable</code>
private static Hashtable getCvterms(GmodDAO dao)
cvterms = new Hashtable();
List cvterm_list = dao.getCvTerms();
Iterator it = cvterm_list.iterator();
CvTerm cvterm = (CvTerm)it.next();
cvterms.put(new Long(cvterm.getCvTermId()), cvterm);
catch(RuntimeException sqle)
System.err.println("SQLException retrieving CvTerms");
/**
* Look up synonym type names e.g. synonym, systematic_id.
* @return the synonym tag names
*/
public static String[] getSynonymTypeNames(String cv_name)
{
Vector synonym_names = new Vector();
Enumeration cvterm_enum = cvterms.elements();
while(cvterm_enum.hasMoreElements())
{
CvTerm cvterm = (CvTerm)cvterm_enum.nextElement();
if(cvterm.getCv().getName().equals(cv_name))
synonym_names.add(cvterm.getName());
}
return (String[])synonym_names.toArray(
new String[synonym_names.size()]);
}
/**
* Get the sequence for a feature.
* @param dao the data access object
* @param buff the buffer to add the sequence to
* @return the resulting buffer
* @throws java.sql.SQLException
*/
private ByteBuffer getChadoSequence(GmodDAO dao, ByteBuffer buff)