Newer
Older
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
package uk.ac.sanger.artemis.util;
import uk.ac.sanger.artemis.io.ChadoCanonicalGene;
import uk.ac.sanger.artemis.io.ReadFormatException;
import uk.ac.sanger.artemis.chado.IBatisDAO;
import uk.ac.sanger.artemis.chado.JdbcDAO;
import uk.ac.sanger.artemis.chado.GmodDAO;
import uk.ac.sanger.artemis.chado.ChadoTransaction;
import uk.ac.sanger.artemis.components.database.DatabaseEntrySource;
import org.gmod.schema.sequence.Feature;
import org.gmod.schema.sequence.FeatureProp;
import org.gmod.schema.sequence.FeatureLoc;
import org.gmod.schema.sequence.FeatureRelationship;
import org.gmod.schema.sequence.FeatureSynonym;
import org.gmod.schema.sequence.FeatureCvTerm;
import org.gmod.schema.general.DbXRef;
* Objects of this class are Documents created from a relational database.
*
*/
/** database schema */
private String schema = "public";
private static Hashtable cvterms;
private InputStreamProgressListener progress_listener;
/** JDBC DAO */
private JdbcDAO jdbcDAO = null;
/** iBatis DAO */
private IBatisDAO connIB = null;
private String[] types = { "exon", "gene", "CDS", "transcript" };
private List schema_list;
private boolean gene_builder;
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
*
*/
public DatabaseDocument(String location, JPasswordField pfield)
if(location.indexOf('=') > -1)
this.schema = location.substring( location.indexOf('=')+ 1);
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* @param feature_id
* ID of a feature to be extracted.
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
* @param feature_id
* ID of a feature to be extracted.
* @param splitGFFEntry
* split into separate entries based on feature types.
* @param progress_listener
* input stream progress listener
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
InputStreamProgressListener progress_listener)
{
super(location);
this.progress_listener = progress_listener;
/**
* Used by the gene builder to read a database entry
* for a single gene.
* @param location
* @param pfield
* @param feature_id
* @param schema
* @param gene_builder
*/
public DatabaseDocument(String location, JPasswordField pfield,
String feature_id, String schema, boolean gene_builder)
{
super(location);
this.pfield = pfield;
this.feature_id = feature_id;
this.schema = schema;
this.gene_builder = gene_builder;
if(System.getProperty("ibatis") != null)
{
iBatis = true;
System.setProperty("chado", location);
}
}
public DatabaseDocument(String location, JPasswordField pfield,
/**
* Reset the schema.
* @param location
* @param schema
*/
private void reset(String location, String schema)
{
this.schema = schema;
if(!location.endsWith("="+schema))
{
int index = location.lastIndexOf('=');
setLocation(location.substring(0,index+1) + schema);
connIB = null;
jdbcDAO = null;
System.setProperty("chado", (String)getLocation());
}
}
return new DatabaseDocument( ((String)getLocation()) + name, pfield);
* Return the name of this Document (the last element of the Document
* location).
*/
public String getName()
int ind = ((String) getLocation()).indexOf("?");
String name = ((String) getLocation()).substring(0, ind);
/**
* Set the name of this document.
*/
public void setName(String name)
{
this.name = name;
}
public DatabaseDocument createDatabaseDocument()
return new DatabaseDocument( (String)getLocation(), pfield,
feature_id, schema );
* Return true if and only if the Document refered to by this object exists
* and is readable. Always returns true.
*/
public boolean readable()
* Return true if and only if the Document refered to by this object exists
* and can be written to. Always returns false.
*/
public boolean writable()
* Create a new InputStream object from this Document. The contents of the
* Document can be read from the InputStream.
*
* @exception IOException
* Thrown if the Document can't be read from (for example if it
* doesn't exist).
*/
public InputStream getInputStream() throws IOException
ByteArrayInputStream instream;
if(gff_buff != null)
{
instream = new ByteArrayInputStream(gff_buff.getBytes());
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).startTransaction();
// if creating a gene builder
if(gene_builder)
{
List schemaList = new Vector();
schemaList.add(schema);
return new ByteArrayInputStream(getGeneFeature(feature_id,
schemaList, dao).getBytes());
}
gff_buffer = getGff(dao, feature_id);
if(splitGFFEntry)
if(gff_buffer[0].size() > 0)
entry.append(gff_buffer[0]);
getChadoSequence(dao, entry);
}
else
{
for(int i = 0; i < gff_buffer.length; i++)
{
if(gff_buffer[i].size() > 0)
entry.append(gff_buffer[i]);
}
getChadoSequence(dao, entry);
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).commitTransaction();
}
finally
{
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).endTransaction();
}
instream = new ByteArrayInputStream(entry.getBytes());
catch(RuntimeException re)
{
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
re.getMessage(),
"Problems Reading From the Database ",
JOptionPane.ERROR_MESSAGE);
re.printStackTrace();
}
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
sqlExp.getMessage(),
"Problems Reading From the Database ",
sqlExp.printStackTrace();
}
return null;
}
*
* Called (by DatabaseEntrySource) to retrieve all the documents for each
* entry created.
*
*/
public DatabaseDocument[] getGffDocuments(String location, String id,
String schema)
{
if(gff_buffer[i].size() > 0)
nentries++;
}
DatabaseDocument[] new_docs = new DatabaseDocument[nentries];
nentries = 0;
String name;
if(i >= types.length)
name = "other";
else
name = types[i];
new_docs[nentries] = new DatabaseDocument(location, pfield, id, schema,
gff_buffer[i], name);
* Create an array of GFF lines.
* @param dao the data access object
* @param parentFeatureID the parent identifier for the features to
* extract
* @return the <code>ByteBuffer</code> array of GFF lines
private ByteBuffer[] getGff(GmodDAO dao, String parentFeatureID)
Feature srcFeature = new Feature();
srcFeature.setFeatureId(srcfeature_id);
featureloc.setFeatureBySrcFeatureId(srcFeature);
//featureloc.setSrcfeature_id(srcfeature_id);
List featList = dao.getFeaturesByLocatedOnFeature(parent);
ByteBuffer[] buffers = new ByteBuffer[types.length + 1];
for(int i = 0; i < buffers.length; i++)
final Feature parentFeature = dao.getFeatureById(srcfeature_id);
Hashtable id_store = new Hashtable(feature_size);
for(int i = 0; i < feature_size; i++)
{
dao.getFeatureDbXRefsByFeatureUniquename(null));
Hashtable synonym = getAllFeatureSynonyms(dao, null);
Hashtable featureCvTerms = getFeatureCvTermsByFeature(dao);
if(featureCvTerms != null)
System.out.println("\n\n"+featureCvTerms.size()+"\n\n");
long type_id = feat.getCvTerm().getCvTermId();
String typeName = getCvtermName(type_id, dao);
{
if(types[j].equals(typeName))
this_buff = buffers[j];
}
dbxrefs, synonym, featureCvTerms,
if( i%10 == 0 || i == feature_size-1)
progress_listener.progressMade("Read from database: " +
* Get a <code>Hashtable</code> of feature_id keys and their corresponding
* feature_synonym
private Hashtable getAllFeatureSynonyms(final GmodDAO dao,
List list = dao.getFeatureSynonymsByFeatureUniquename(uniquename);
Hashtable synonym = new Hashtable();
Integer feature_id;
if(synonym.containsKey(feature_id))
value = (Vector)synonym.get(feature_id);
else
value = new Vector();
value.add(alias);
synonym.put(feature_id, value);
}
return synonym;
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
}
private Hashtable getFeatureCvTermsByFeature(final GmodDAO dao)
{
List list = dao.getFeatureCvTermsByFeature(null);
Hashtable featureCvTerms = new Hashtable();
Integer feature_id;
List value;
FeatureCvTerm feature_cvterm;
for(int i=0; i<list.size(); i++)
{
feature_cvterm = (FeatureCvTerm)list.get(i);
feature_id = new Integer(feature_cvterm.getFeature().getFeatureId());
if(featureCvTerms.containsKey(feature_id))
value = (Vector)featureCvTerms.get(feature_id);
else
value = new Vector();
value.add(feature_cvterm);
featureCvTerms.put(feature_id, value);
}
return featureCvTerms;
}
/**
* Use by the gene editor to retrieve the gene and related
* features
* @param search_gene gene uniquename
* @param schema_search schema list to search
* @param dao data access method
* @return GFF byte buffer
* @throws SQLException
* @throws ReadFormatException
private ByteBuffer getGeneFeature(final String search_gene,
final List schema_search,
throws SQLException, ReadFormatException, ConnectException
{
Hashtable id_store = new Hashtable();
reset((String)getLocation(), (String)schema_search.get(0));
dao = getDAO();
Feature feature =
(Feature)dao.getFeatureByUniqueName(search_gene);
ChadoCanonicalGene chado_gene = new ChadoCanonicalGene();
id_store.put(Integer.toString(feature.getFeatureId()), feature.getUniqueName());
List featurelocs = new Vector(feature.getFeatureLocsForFeatureId());
FeatureLoc featureloc = (FeatureLoc) featurelocs.get(0);
int src_id = featureloc.getFeatureBySrcFeatureId().getFeatureId();
parent = dao.getFeatureById(src_id); //.getLazyFeature(parent);
chado_gene.setSeqlen(parent.getSeqLen().intValue());
ByteBuffer buff = new ByteBuffer();
chadoToGFF(feature, null, null, null, null, null, dao,
featureloc, buff);
// get children of gene
List relations = new Vector(feature.getFeatureRelationshipsForObjectId());
for(int i = 0; i < relations.size(); i++)
{
int id = ((FeatureRelationship) relations.get(i)).getFeatureBySubjectId().getFeatureId();
Feature transcript =
(Feature)dao.getFeatureById(id); //.getLazyFeature(transcript);
FeatureLoc loc = getFeatureLoc(new Vector(
transcript.getFeatureLocsForFeatureId()), src_id);
chadoToGFF(transcript, feature.getUniqueName(), null,
null, null, id_store, dao, loc, buff);
// get children of transcript - exons and pp
List transcipt_relations = new Vector(
transcript.getFeatureRelationshipsForObjectId());
for(int j = 0; j < transcipt_relations.size(); j++)
{
id = ((FeatureRelationship) transcipt_relations.get(j)).getFeatureBySubjectId().getFeatureId();
//Feature child = new Feature();
//child.setId(((FeatureRelationship) transcipt_relations.get(j))
Feature child =
(Feature)dao.getFeatureById(id); //dao.getLazyFeature(child);
id_store.put(Integer.toString(child.getFeatureId()), child.getUniqueName());
new Vector(child.getFeatureLocsForFeatureId()),src_id);
chadoToGFF(child, transcript.getUniqueName(), null,
null, null, id_store, dao, loc, buff);
}
}
return buff;
}
/**
* Convert the chado feature into a GFF line
* @param feat Chado feature
* @param parentFeature parent of this feature
* @param dbxrefs hashtable containing dbxrefs
* @param synonym hashtable containing synonynms
* @param id_store id store for looking up parent names
* @param dao chado data access
* @param featureloc feature location for this chado feature
private static void chadoToGFF(final Feature feat,
final String parentFeature,
final Hashtable dbxrefs,
final Hashtable synonym,
final Hashtable featureCvTerms,
final Hashtable id_store,
final ByteBuffer this_buff)
final int fmin = featureloc.getFmin().intValue() + 1;
final int fmax = featureloc.getFmax().intValue();
final long type_id = feat.getCvTerm().getCvTermId();
final Short strand = featureloc.getStrand();
final Integer phase = featureloc.getPhase();
final String name = feat.getUniqueName();
final String typeName = getCvtermName(type_id, dao);
final Integer feature_id = new Integer(feat.getFeatureId());
final String timelastmodified = Long.toString(feat.getTimeLastModified().getTime());
String parent_id = null;
String parent_relationship = null;
/* if(feat.getFeatureRelationship() != null)
FeatureRelationship feat_relationship = feat.getFeatureRelationship();
parent_id = Integer.toString(feat_relationship.getFeatureByObjectId().getFeatureId());
long parent_type_id = feat_relationship.getCvTerm().getCvTermId();
parent_relationship = feat_relationship.getCvTerm().getName();
if(parent_relationship == null)
parent_relationship = getCvtermName(parent_type_id, dao);
}
else */
if(feat.getFeatureRelationshipsForSubjectId() != null)
List relations = new Vector(feat.getFeatureRelationshipsForSubjectId());
FeatureRelationship feat_relationship =
(FeatureRelationship)relations.get(i);
parent_id = Integer.toString(feat_relationship.getFeatureByObjectId().getFeatureId());
if( feat_relationship.getCvTerm().getName() == null )
{
long parent_type_id = feat_relationship.getCvTerm().getCvTermId();
parent_relationship = getCvtermName(parent_type_id, dao);
}
else
parent_relationship = feat_relationship.getCvTerm().getName();
}
}
if(parent_id != null && id_store != null && id_store.containsKey(parent_id))
parent_id = (String)id_store.get(parent_id);
// make gff format
Vector dbxref = null;
// append dbxrefs
if(dbxrefs != null &&
dbxrefs.containsKey(feature_id))
dbxref = (Vector)dbxrefs.get(feature_id);
if(((String)dbxref.get(j)).startsWith("GFF_source:"))
gff_source = ((String)dbxref.get(j)).substring(11);
dbxref.removeElementAt(j);
this_buff.append(parentFeature + "\t"); // seqid
if(gff_source != null)
this_buff.append(gff_source+"\t"); // source
else
this_buff.append("chado\t");
this_buff.append(typeName + "\t"); // type
this_buff.append(fmin + "\t"); // start
this_buff.append(fmax + "\t"); // end
this_buff.append(".\t"); // score
if(strand.equals( new Short((short)-1)) ) // strand
else if(strand.equals( new Short((short)1)) )
this_buff.append("+\t");
else
this_buff.append(".\t");
this_buff.append(".\t"); // phase
else
this_buff.append(phase+"\t");
this_buff.append("ID=" + name + ";");
if(parent_id != null && !parent_id.equals("0"))
{
if(parent_relationship.equals("derives_from"))
this_buff.append("Derives_from=" + parent_id + ";");
this_buff.append("Parent=" + parent_id + ";");
}
this_buff.append("timelastmodified=" + timelastmodified + ";");
// this is the chado feature_relationship.rank used
// to order features e.g. exons
if(rank > -1)
this_buff.append("feature_relationship_rank="+rank+";");
//this_buff.append("feature_id="+feature_id+";");
List featureprops = (List)feat.getFeatureProps();
for(int j=0; j<featureprops.size(); j++)
FeatureProp featprop = (FeatureProp)featureprops.get(j);
String qualifier_name = getCvtermName(featprop.getCvTerm().getCvTermId(), dao);
if(featprop.getValue() != null)
this_buff.append(qualifier_name+ "=" +
GFFStreamFeature.encode(featprop.getValue())+";");
// append dbxrefs
if(dbxref != null && dbxref.size() > 0)
{
this_buff.append("Dbxref=");
for(int j=0; j<dbxref.size(); j++)
this_buff.append((String)dbxref.get(j));
if(j<dbxref.size()-1)
this_buff.append(",");
this_buff.append(";");
}
// append synonyms
if(synonym != null &&
synonym.containsKey(feature_id))
Vector v_synonyms = (Vector)synonym.get(feature_id);
this_buff.append( getCvtermName(alias.getSynonym().getCvTerm().getCvTermId(), dao) + "=" );
//this_buff.append(alias.getSynonym().getCvterm().getName()+"=");
this_buff.append(alias.getSynonym().getName());
if(j<v_synonyms.size()-1)
this_buff.append(";");
if(featureCvTerms != null &&
featureCvTerms.containsKey(feature_id))
{
FeatureCvTerm feature_cvterm;
Vector v_feature_cvterms = (Vector)featureCvTerms.get(feature_id);
for(int j=0; j<v_feature_cvterms.size(); j++)
{
feature_cvterm = (FeatureCvTerm)v_feature_cvterms.get(j);
DbXRef dbXRef = feature_cvterm.getCvTerm().getDbXRef();
this_buff.append("GO=");
if(feature_cvterm.isNot())
this_buff.append("qualifier=NOT;");
this_buff.append(dbXRef.getDb().getName()+":"+dbXRef.getAccession()+";");
}
//System.out.println(new String(this_buff.getBytes()));
}
/**
* Look up the cvterm_id for a controlled vocabulary name.
* @param name
* @return
*/
Enumeration enum_cvterm = cvterms.keys();
while(enum_cvterm.hasMoreElements())
{
Long key = (Long)enum_cvterm.nextElement();
if(name.equals( ((CvTerm)cvterms.get(key)).getName() ))
* Look up a cvterm name from the collection of cvterms.
* @param id a cvterm_id
* @return the cvterm name
private static String getCvtermName(long id, GmodDAO dao)
return ((CvTerm)cvterms.get(new Long(id))).getName();
* @param dao the data access object
* @return the cvterm <code>Hashtable</code>
private static Hashtable getCvterm(GmodDAO dao)
cvterms = new Hashtable();
List cvterm_list = dao.getCvTerms();
Iterator it = cvterm_list.iterator();
CvTerm cvterm = (CvTerm)it.next();
cvterms.put(new Long(cvterm.getCvTermId()), cvterm);
catch(RuntimeException sqle)
System.err.println("SQLException retrieving CvTerms");
/**
* Look up synonym type names e.g. synonym, systematic_id.
* @return the synonym tag names
*/
public static String[] getSynonymTypeNames(String cv_name)
{
Vector synonym_names = new Vector();
Enumeration cvterm_enum = cvterms.elements();
while(cvterm_enum.hasMoreElements())
{
CvTerm cvterm = (CvTerm)cvterm_enum.nextElement();
if(cvterm.getCv().getName().equals(cv_name))
synonym_names.add(cvterm.getName());
}
return (String[])synonym_names.toArray(
new String[synonym_names.size()]);
}
/**
* Get the sequence for a feature.
* @param dao the data access object
* @param buff the buffer to add the sequence to
* @return the resulting buffer
* @throws java.sql.SQLException
*/
private ByteBuffer getChadoSequence(GmodDAO dao, ByteBuffer buff)
Feature feature = dao.getFeatureById(Integer.parseInt(feature_id));
/**
* Get the <code>List</code> of available schemas.
* @return the <code>List</code> of available schemas
*/
public List getSchema()
{
return schema_list;
}
* Create a hashtable of the available entries with residues.
* @return a <code>Hashtable</code> of the <code>String</code>
* representation (schema-type-feature_name) and the
* corresponding feature_id
* @throws ConnectException
* @throws java.sql.SQLException
*/