Newer
Older
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
package uk.ac.sanger.artemis.util;
import uk.ac.sanger.artemis.io.ChadoCanonicalGene;
import uk.ac.sanger.artemis.io.ReadFormatException;
import uk.ac.sanger.artemis.chado.IBatisDAO;
import uk.ac.sanger.artemis.chado.JdbcDAO;
import uk.ac.sanger.artemis.chado.GmodDAO;
import uk.ac.sanger.artemis.chado.ChadoTransaction;
import uk.ac.sanger.artemis.components.database.DatabaseEntrySource;
import org.gmod.schema.sequence.Feature;
import org.gmod.schema.sequence.FeatureProp;
import org.gmod.schema.sequence.FeatureLoc;
import org.gmod.schema.sequence.FeatureRelationship;
import org.gmod.schema.sequence.FeatureSynonym;
import org.gmod.schema.sequence.FeatureCvTerm;
import org.gmod.schema.sequence.FeatureCvTermProp;
import org.gmod.schema.general.DbXRef;
import org.gmod.schema.pub.PubDbXRef;
import org.gmod.schema.pub.Pub;
* Objects of this class are Documents created from a relational database.
*
*/
/** source feature_id */
private String srcFeatureId = "1";
/** database schema */
private String schema = "public";
private static Hashtable cvterms;
private InputStreamProgressListener progress_listener;
/** JDBC DAO */
private JdbcDAO jdbcDAO = null;
/** iBatis DAO */
private IBatisDAO connIB = null;
private String[] types = { "exon", "gene", "CDS", "transcript" };
private List schema_list;
private boolean gene_builder;
// controlled vocabulary
/** controlled_curation controlled vocabulary */
public static String RILEY_TAG_CVNAME = "RILEY";
private static org.apache.log4j.Logger logger4j =
org.apache.log4j.Logger.getLogger(DatabaseDocument.class);
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
*
*/
public DatabaseDocument(String location, JPasswordField pfield)
if(location.indexOf('=') > -1)
this.schema = location.substring( location.indexOf('=')+ 1);
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* @param feature_id
* ID of a feature to be extracted.
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
String srcFeatureId, String schema)
this.srcFeatureId = srcFeatureId;
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
* @param srcFeatureId
* ID of a feature to be extracted.
* @param splitGFFEntry
* split into separate entries based on feature types.
* @param progress_listener
* input stream progress listener
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
String srcFeatureId, String schema, boolean splitGFFEntry,
InputStreamProgressListener progress_listener)
{
super(location);
this.srcFeatureId = srcFeatureId;
this.progress_listener = progress_listener;
/**
* Used by the gene builder to read a database entry
* for a single gene.
* @param location
* @param pfield
* @param srcFeatureId
* @param schema
* @param gene_builder
*/
public DatabaseDocument(String location, JPasswordField pfield,
String srcFeatureId, String schema, boolean gene_builder)
{
super(location);
this.pfield = pfield;
this.srcFeatureId = srcFeatureId;
this.schema = schema;
this.gene_builder = gene_builder;
if(System.getProperty("ibatis") != null)
{
iBatis = true;
System.setProperty("chado", location);
}
}
public DatabaseDocument(String location, JPasswordField pfield,
String srcFeatureId, String schema,
this.srcFeatureId = srcFeatureId;
/**
* Reset the schema.
* @param location
* @param schema
*/
private void reset(String location, String schema)
{
this.schema = schema;
if(!location.endsWith("="+schema))
{
int index = location.lastIndexOf('=');
setLocation(location.substring(0,index+1) + schema);
connIB = null;
jdbcDAO = null;
System.setProperty("chado", (String)getLocation());
return new DatabaseDocument( ((String)getLocation()) + name, pfield);
* Return the name of this Document (the last element of the Document
* location).
*/
public String getName()
int ind = ((String) getLocation()).indexOf("?");
String name = ((String) getLocation()).substring(0, ind);
/**
* Set the name of this document.
*/
public void setName(String name)
{
this.name = name;
}
public DatabaseDocument createDatabaseDocument()
return new DatabaseDocument( (String)getLocation(), pfield,
srcFeatureId, schema );
* Return true if and only if the Document refered to by this object exists
* and is readable. Always returns true.
*/
public boolean readable()
* Return true if and only if the Document refered to by this object exists
* and can be written to. Always returns false.
*/
public boolean writable()
* Create a new InputStream object from this Document. The contents of the
* Document can be read from the InputStream.
*
* @exception IOException
* Thrown if the Document can't be read from (for example if it
* doesn't exist).
*/
public InputStream getInputStream() throws IOException
ByteArrayInputStream instream;
if(gff_buff != null)
{
instream = new ByteArrayInputStream(gff_buff.getBytes());
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).startTransaction();
// if creating a gene builder
if(gene_builder)
{
List schemaList = new Vector();
schemaList.add(schema);
return new ByteArrayInputStream(getGeneFeature(srcFeatureId,
gff_buffer = getGff(dao);
if(gff_buffer[0].size() > 0)
entry.append(gff_buffer[0]);
getChadoSequence(dao, entry);
}
else
{
for(int i = 0; i < gff_buffer.length; i++)
{
if(gff_buffer[i].size() > 0)
entry.append(gff_buffer[i]);
}
getChadoSequence(dao, entry);
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).commitTransaction();
}
finally
{
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).endTransaction();
}
instream = new ByteArrayInputStream(entry.getBytes());
catch(RuntimeException re)
{
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
re.getMessage(),
"Problems Reading From the Database ",
JOptionPane.ERROR_MESSAGE);
re.printStackTrace();
}
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
sqlExp.getMessage(),
"Problems Reading From the Database ",
sqlExp.printStackTrace();
}
return null;
}
*
* Called (by DatabaseEntrySource) to retrieve all the documents for each
* entry created.
*
*/
public DatabaseDocument[] getGffDocuments(String location, String id,
String schema)
{
if(gff_buffer[i].size() > 0)
nentries++;
}
DatabaseDocument[] new_docs = new DatabaseDocument[nentries];
nentries = 0;
String name;
if(i >= types.length)
name = "other";
else
name = types[i];
new_docs[nentries] = new DatabaseDocument(location, pfield, id, schema,
gff_buffer[i], name);
* Create an array of GFF lines.
* @param dao the data access object
* @param parentFeatureID the parent identifier for the features to
* extract
* @return the <code>ByteBuffer</code> array of GFF lines
private ByteBuffer[] getGff(GmodDAO dao)
final int srcfeature_id = Integer.parseInt(srcFeatureId);
Feature srcFeature = new Feature();
srcFeature.setFeatureId(srcfeature_id);
featureloc.setFeatureBySrcFeatureId(srcFeature);
//featureloc.setSrcfeature_id(srcfeature_id);
List featList = dao.getFeaturesByLocatedOnFeature(parent);
ByteBuffer[] buffers = new ByteBuffer[types.length + 1];
for(int i = 0; i < buffers.length; i++)
final Feature parentFeature = dao.getFeatureById(srcfeature_id);
Hashtable id_store = new Hashtable(feature_size);
for(int i = 0; i < feature_size; i++)
{
String featureId = Integer.toString(feat.getFeatureId());
id_store.put(featureId, name);
dao.getFeatureDbXRefsByFeatureUniquename(null));
Hashtable synonym = getAllFeatureSynonyms(dao, null);
Hashtable featureCvTerms = getFeatureCvTermsByFeature(dao, null);
Hashtable featureCvTermDbXRefs = getFeatureCvTermDbXRef(dao, null);
Hashtable featureCvTermPubs = getFeatureCvTermPub(dao, null);
String typeName = getCvtermName(type_id, dao);
{
if(types[j].equals(typeName))
this_buff = buffers[j];
}
dbxrefs, synonym, featureCvTerms,
pubDbXRefs, featureCvTermDbXRefs, featureCvTermPubs,
id_store, dao,
if( i%10 == 0 || i == feature_size-1)
progress_listener.progressMade("Read from database: " +
* Get a <code>Hashtable</code> of feature_id keys and their corresponding
* feature_synonym
private Hashtable getAllFeatureSynonyms(final GmodDAO dao,
List list = dao.getFeatureSynonymsByFeatureUniquename(uniquename);
Integer featureId;
featureId = new Integer(alias.getFeature().getFeatureId());
if(synonym.containsKey(featureId))
value = (Vector)synonym.get(featureId);
else
value = new Vector();
value.add(alias);
synonym.put(featureId, value);
/**
*
* @param dao
* @param chadoFeature null if we want them all
* @return
*/
private Hashtable getFeatureCvTermsByFeature(final GmodDAO dao,
final Feature chadoFeature)
Hashtable featureCvTerms = new Hashtable();
Integer featureId;
List value;
FeatureCvTerm feature_cvterm;
for(int i=0; i<list.size(); i++)
{
feature_cvterm = (FeatureCvTerm)list.get(i);
featureId = new Integer(feature_cvterm.getFeature().getFeatureId());
if(featureCvTerms.containsKey(featureId))
value = (Vector)featureCvTerms.get(featureId);
else
value = new Vector();
value.add(feature_cvterm);
featureCvTerms.put(featureId, value);
}
return featureCvTerms;
}
/**
*
* @param dao
* @param chadoFeature null if we want all
* @return
*/
private Hashtable getFeatureCvTermDbXRef(final GmodDAO dao, final Feature chadoFeature)
List list = dao.getFeatureCvTermDbXRefByFeature(chadoFeature);
Hashtable featureCvTermDbXRefs = new Hashtable(list.size());
for(int i=0; i<list.size(); i++)
{
FeatureCvTermDbXRef featureCvTermDbXRef =
(FeatureCvTermDbXRef)list.get(i);
featureCvTermDbXRefId = new Integer(
featureCvTermDbXRef.getFeatureCvTerm().getFeatureCvTermId());
if(featureCvTermDbXRefs.containsKey(featureCvTermDbXRefId))
value = (Vector)featureCvTermDbXRefs.get(featureCvTermDbXRefId);
else
value = new Vector();
value.add(featureCvTermDbXRef);
featureCvTermDbXRefs.put(featureCvTermDbXRefId, value);
private Hashtable getFeatureCvTermPub(final GmodDAO dao,
final Feature chadoFeature)
List list = dao.getFeatureCvTermPubByFeature(chadoFeature);
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
if(list == null || list.size() == 0)
return null;
Integer featureCvTermId;
List value;
Hashtable featureCvTermPubs = new Hashtable(list.size());
for(int i=0; i<list.size(); i++)
{
FeatureCvTermPub featureCvTermPub =
(FeatureCvTermPub)list.get(i);
featureCvTermId = new Integer(
featureCvTermPub.getFeatureCvTerm().getFeatureCvTermId());
if(featureCvTermPubs.containsKey(featureCvTermId))
value = (Vector)featureCvTermPubs.get(featureCvTermId);
else
value = new Vector();
value.add(featureCvTermPub);
featureCvTermPubs.put(featureCvTermId, value);
}
return featureCvTermPubs;
}
/**
* Use by the gene editor to retrieve the gene and related
* features
* @param search_gene gene uniquename
* @param schema_search schema list to search
* @param dao data access method
* @return GFF byte buffer
* @throws SQLException
* @throws ReadFormatException
private ByteBuffer getGeneFeature(final String search_gene,
final List schema_search,
throws SQLException, ReadFormatException, ConnectException
{
Hashtable id_store = new Hashtable();
reset((String)getLocation(), (String)schema_search.get(0));
dao = getDAO();
(Feature)(dao.getFeaturesByUniqueName(search_gene).get(0));
ChadoCanonicalGene chado_gene = new ChadoCanonicalGene();
id_store.put(Integer.toString(chadoFeature.getFeatureId()),
chadoFeature.getUniqueName());
List featurelocs = new Vector(chadoFeature.getFeatureLocsForFeatureId());
FeatureLoc featureloc = (FeatureLoc) featurelocs.get(0);
int src_id = featureloc.getSrcFeatureId();
srcFeatureId = Integer.toString(src_id);
ByteBuffer buff = new ByteBuffer();
buildGffLineFromId(dao, chadoFeature.getFeatureId(),
id_store, parent.getUniqueName(), src_id, buff, chadoFeature);
// get children of gene
List relations = new Vector(chadoFeature.getFeatureRelationshipsForObjectId());
for(int i = 0; i < relations.size(); i++)
{
int id = ((FeatureRelationship) relations.get(i)).getFeatureBySubjectId().getFeatureId();
Feature transcript = buildGffLineFromId(dao, id, id_store, parent.getUniqueName(),
src_id, buff, null);
// get children of transcript - exons and pp
List transcipt_relations = new Vector(
transcript.getFeatureRelationshipsForObjectId());
for(int j = 0; j < transcipt_relations.size(); j++)
{
id = ((FeatureRelationship) transcipt_relations.get(j)).getFeatureBySubjectId().getFeatureId();
buildGffLineFromId(dao, id, id_store, parent.getUniqueName(),
src_id, buff, null);
private Feature buildGffLineFromId(final GmodDAO dao,
final int featureId,
final Hashtable id_store,
final String parentName,
final int srcFeatureId,
final ByteBuffer this_buff,
Feature chadoFeature)
{
if(chadoFeature == null)
chadoFeature = (Feature)dao.getFeatureById(featureId);
id_store.put(Integer.toString(chadoFeature.getFeatureId()),
chadoFeature.getUniqueName());
FeatureLoc loc = getFeatureLoc(new Vector(
chadoFeature.getFeatureLocsForFeatureId()), srcFeatureId);
Hashtable dbxrefs = IBatisDAO.mergeDbXRef(
dao.getFeatureDbXRefsByFeatureUniquename(chadoFeature.getUniqueName()));
Hashtable synonym = getAllFeatureSynonyms(dao, chadoFeature.getUniqueName());
Hashtable featureCvTerms = getFeatureCvTermsByFeature(dao, chadoFeature);
Hashtable featureCvTermDbXRefs = getFeatureCvTermDbXRef(dao, chadoFeature);
Hashtable featureCvTermPubs = getFeatureCvTermPub(dao, chadoFeature);
chadoToGFF(chadoFeature, parentName, dbxrefs, synonym, featureCvTerms,
null, featureCvTermDbXRefs, featureCvTermPubs, id_store, dao, loc, this_buff);
return chadoFeature;
}
/**
* Convert the chado feature into a GFF line
* @param feat Chado feature
* @param parentFeature parent of this feature
* @param dbxrefs hashtable containing dbxrefs
* @param synonym hashtable containing synonynms
* @param featureCvTerms
* @param pubDbXRefs
* @param featureCvTermDbXRefs
* @param id_store id store for looking up parent names
* @param dao chado data access
* @param featureloc feature location for this chado feature
private static void chadoToGFF(final Feature feat,
final String parentFeature,
final Hashtable dbxrefs,
final Hashtable synonym,
final Hashtable featureCvTerms,
final Hashtable id_store,
final ByteBuffer this_buff)
final int fmin = featureloc.getFmin().intValue() + 1;
final int fmax = featureloc.getFmax().intValue();
final int type_id = feat.getCvTerm().getCvTermId();
final Short strand = featureloc.getStrand();
final Integer phase = featureloc.getPhase();
final String name = feat.getUniqueName();
final String typeName = getCvtermName(type_id, dao);
final Integer featureId = new Integer(feat.getFeatureId());
final String timelastmodified = Long.toString(feat.getTimeLastModified().getTime());
String parent_id = null;
String parent_relationship = null;
/* if(feat.getFeatureRelationship() != null)
FeatureRelationship feat_relationship = feat.getFeatureRelationship();
parent_id = Integer.toString(feat_relationship.getFeatureByObjectId().getFeatureId());
long parent_type_id = feat_relationship.getCvTerm().getCvTermId();
parent_relationship = feat_relationship.getCvTerm().getName();
if(parent_relationship == null)
parent_relationship = getCvtermName(parent_type_id, dao);
}
else */
if(feat.getFeatureRelationshipsForSubjectId() != null)
List relations = new Vector(feat.getFeatureRelationshipsForSubjectId());
FeatureRelationship feat_relationship =
(FeatureRelationship)relations.get(i);
parent_id = Integer.toString(feat_relationship.getFeatureByObjectId().getFeatureId());
if( feat_relationship.getCvTerm().getName() == null )
{
parent_relationship = getCvtermName(parent_type_id, dao);
}
else
parent_relationship = feat_relationship.getCvTerm().getName();
}
}
if(parent_id != null && id_store != null && id_store.containsKey(parent_id))
parent_id = (String)id_store.get(parent_id);
// make gff format
Vector dbxref = null;
// append dbxrefs
if(dbxrefs != null &&
dbxrefs.containsKey(featureId))
dbxref = (Vector)dbxrefs.get(featureId);
if(((String)dbxref.get(j)).startsWith("GFF_source:"))
gff_source = ((String)dbxref.get(j)).substring(11);
dbxref.removeElementAt(j);
this_buff.append(parentFeature + "\t"); // seqid
if(gff_source != null)
this_buff.append(gff_source+"\t"); // source
else
this_buff.append("chado\t");
this_buff.append(typeName + "\t"); // type
this_buff.append(fmin + "\t"); // start
this_buff.append(fmax + "\t"); // end
this_buff.append(".\t"); // score
if(strand.equals( new Short((short)-1)) ) // strand
else if(strand.equals( new Short((short)1)) )
this_buff.append("+\t");
else
this_buff.append(".\t");
this_buff.append(".\t"); // phase
else
this_buff.append(phase+"\t");
this_buff.append("ID=" + name + ";");
this_buff.append("feature_id=" + featureId.toString() + ";");
if(parent_id != null && !parent_id.equals("0"))
{
if(parent_relationship.equals("derives_from"))
this_buff.append("Derives_from=" + parent_id + ";");
this_buff.append("Parent=" + parent_id + ";");
}
this_buff.append("timelastmodified=" + timelastmodified + ";");
// this is the chado feature_relationship.rank used
this_buff.append("feature_relationship_rank="+rank+";");
//this_buff.append("feature_id="+feature_id+";");
if(feat.getFeatureProps() != null &&
feat.getFeatureProps().size() > 0)
Collection featureprops = feat.getFeatureProps();
Iterator it = featureprops.iterator();
while(it.hasNext())
String qualifier_name = getCvtermName(featprop.getCvTerm().getCvTermId(), dao);
if(featprop.getValue() != null)
this_buff.append(qualifier_name+ "=" +
GFFStreamFeature.encode(featprop.getValue())+";");
// append dbxrefs
if(dbxref != null && dbxref.size() > 0)
{
this_buff.append("Dbxref=");
for(int j=0; j<dbxref.size(); j++)
this_buff.append((String)dbxref.get(j));
if(j<dbxref.size()-1)
this_buff.append(",");
this_buff.append(";");
}
// append synonyms
if(synonym != null &&
synonym.containsKey(featureId))
Vector v_synonyms = (Vector)synonym.get(featureId);
this_buff.append( getCvtermName(alias.getSynonym().getCvTerm().getCvTermId(), dao) + "=" );
//this_buff.append(alias.getSynonym().getCvterm().getName()+"=");
if(featureCvTerms != null &&
featureCvTerms.containsKey(featureId))
{
FeatureCvTerm feature_cvterm;
Vector v_feature_cvterms = (Vector)featureCvTerms.get(featureId);
for(int j=0; j<v_feature_cvterms.size(); j++)
{
feature_cvterm = (FeatureCvTerm)v_feature_cvterms.get(j);
Integer featureCvTermId = new Integer( feature_cvterm.getFeatureCvTermId() );
if(featureCvTermDbXRefs != null)
featureCvTermDbXRefList = (List)featureCvTermDbXRefs.get(featureCvTermId);
List featureCvTermPubList = null;
if(featureCvTermPubs != null)
featureCvTermPubList = (List)featureCvTermPubs.get(featureCvTermId);