Newer
Older
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
package uk.ac.sanger.artemis.util;
import uk.ac.sanger.artemis.io.ChadoCanonicalGene;
import uk.ac.sanger.artemis.io.InvalidRelationException;
import uk.ac.sanger.artemis.io.ReadFormatException;
import uk.ac.sanger.artemis.chado.*;
import uk.ac.sanger.artemis.components.DatabaseEntrySource;
* Objects of this class are Documents created from a relational database.
*
*/
/** database schema */
private String schema = "public";
private InputStreamProgressListener progress_listener;
/** JDBC DAO */
private JdbcDAO jdbcDAO = null;
/** iBatis DAO */
private IBatisDAO connIB = null;
private String[] types = { "exon", "gene", "CDS", "transcript" };
private boolean gene_builder;
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
*
*/
public DatabaseDocument(String location, JPasswordField pfield)
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* @param feature_id
* ID of a feature to be extracted.
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
*
* Create a new Document from a database.
*
* @param location
* This should be a URL string giving:
* jdbc:postgresql://host:port/datbase_name?user=username
* @param feature_id
* ID of a feature to be extracted.
* @param splitGFFEntry
* split into separate entries based on feature types.
* @param progress_listener
* input stream progress listener
*
*/
public DatabaseDocument(String location, JPasswordField pfield,
InputStreamProgressListener progress_listener)
{
super(location);
this.progress_listener = progress_listener;
reset(location, schema);
/*
if(!location.endsWith("="+schema))
{
int index = location.lastIndexOf('=');
setLocation(location.substring(0,index+1) + schema);
connIB = null;
jdbcDAO = null;
System.setProperty("chado", (String)getLocation());
}
*/
/**
* Used by the gene builder to read a database entry
* for a single gene.
* @param location
* @param pfield
* @param feature_id
* @param schema
* @param gene_builder
*/
public DatabaseDocument(String location, JPasswordField pfield,
String feature_id, String schema, boolean gene_builder)
{
super(location);
this.pfield = pfield;
this.feature_id = feature_id;
this.schema = schema;
this.gene_builder = gene_builder;
if(System.getProperty("ibatis") != null)
{
iBatis = true;
System.setProperty("chado", location);
}
}
public DatabaseDocument(String location, JPasswordField pfield,
/**
* Reset the schema.
* @param location
* @param schema
*/
private void reset(String location, String schema)
{
this.schema = schema;
if(!location.endsWith("="+schema))
{
int index = location.lastIndexOf('=');
setLocation(location.substring(0,index+1) + schema);
connIB = null;
jdbcDAO = null;
System.setProperty("chado", (String)getLocation());
}
}
return new DatabaseDocument( ((String)getLocation()) + name, pfield);
* Return the name of this Document (the last element of the Document
* location).
*/
public String getName()
int ind = ((String) getLocation()).indexOf("?");
String name = ((String) getLocation()).substring(0, ind);
/**
* Set the name of this document.
*/
public void setName(String name)
{
this.name = name;
}
public DatabaseDocument createDatabaseDocument()
return new DatabaseDocument( (String)getLocation(), pfield,
feature_id, schema );
* Return true if and only if the Document refered to by this object exists
* and is readable. Always returns true.
*/
public boolean readable()
* Return true if and only if the Document refered to by this object exists
* and can be written to. Always returns false.
*/
public boolean writable()
* Create a new InputStream object from this Document. The contents of the
* Document can be read from the InputStream.
*
* @exception IOException
* Thrown if the Document can't be read from (for example if it
* doesn't exist).
*/
public InputStream getInputStream() throws IOException
ByteArrayInputStream instream;
if(gff_buff != null)
{
instream = new ByteArrayInputStream(gff_buff.getBytes());
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).startTransaction();
// if creating a gene builder
if(gene_builder)
{
List schemaList = new Vector();
schemaList.add(schema);
return new ByteArrayInputStream(getGeneFeature(feature_id,
schemaList, dao).getBytes());
}
gff_buffer = getGff(dao, feature_id);
if(splitGFFEntry)
if(gff_buffer[0].size() > 0)
entry.append(gff_buffer[0]);
getChadoSequence(dao, entry);
}
else
{
for(int i = 0; i < gff_buffer.length; i++)
{
if(gff_buffer[i].size() > 0)
entry.append(gff_buffer[i]);
}
getChadoSequence(dao, entry);
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).commitTransaction();
}
finally
{
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).endTransaction();
}
instream = new ByteArrayInputStream(entry.getBytes());
return instream;
}
catch(java.sql.SQLException sqlExp)
{
JOptionPane.showMessageDialog(null, "Problems Reading...\n" +
sqlExp.getMessage(),
"Problems Reading From the Database ",
sqlExp.printStackTrace();
}
return null;
}
*
* Called (by DatabaseEntrySource) to retrieve all the documents for each
* entry created.
*
*/
public DatabaseDocument[] getGffDocuments(String location, String id,
String schema)
{
if(gff_buffer[i].size() > 0)
nentries++;
}
DatabaseDocument[] new_docs = new DatabaseDocument[nentries];
nentries = 0;
String name;
if(i >= types.length)
name = "other";
else
name = types[i];
new_docs[nentries] = new DatabaseDocument(location, pfield, id, schema,
gff_buffer[i], name);
* Create an array of GFF lines.
* @param dao the data access object
* @param parentFeatureID the parent identifier for the features to
* extract
* @return the <code>ByteBuffer</code> array of GFF lines
* @throws java.sql.SQLException
private ByteBuffer[] getGff(ChadoDAO dao, String parentFeatureID)
throws java.sql.SQLException
// build srcfeature object
ChadoFeatureLoc featureloc = new ChadoFeatureLoc();
featureloc.setSrcfeature_id(srcfeature_id);
ChadoFeature feature = new ChadoFeature();
feature.setFeatureloc(featureloc);
List featList = dao.getFeature(feature);
ByteBuffer[] buffers = new ByteBuffer[types.length + 1];
for(int i = 0; i < buffers.length; i++)
final String parentFeature = dao.getFeatureName(srcfeature_id);
Hashtable id_store = new Hashtable(feature_size);
for(int i = 0; i < feature_size; i++)
{
String name = feat.getUniquename();
String feature_id = Integer.toString(feat.getId());
Hashtable dbxrefs = dao.getDbxref(null);
Hashtable synonym = dao.getAlias(null);
{
if(types[j].equals(typeName))
this_buff = buffers[j];
}
chadoToGFF(feat, parentFeature,
dbxrefs, synonym,
id_store, dao,
feat.getFeatureloc(), this_buff);
if( i%10 == 0 || i == feature_size-1)
progress_listener.progressMade("Read from database: " +
feat.getUniquename());
/**
* Use by the gene editor to retrieve the gene and related
* features
* @param search_gene gene uniquename
* @param schema_search schema list to search
* @param dao data access method
* @return GFF byte buffer
* @throws SQLException
* @throws ReadFormatException
private ByteBuffer getGeneFeature(final String search_gene,
final List schema_search,
ChadoDAO dao)
throws SQLException, ReadFormatException, ConnectException
{
Hashtable id_store = new Hashtable();
ChadoFeature feature = new ChadoFeature();
feature.setUniquename(search_gene);
reset((String)getLocation(), (String)schema_search.get(0));
dao = getDAO();
List featureList = dao.getLazyFeature(feature);
ChadoCanonicalGene chado_gene = new ChadoCanonicalGene();
if(featureList.size() > 1)
System.err.println("More than one feature found!");
feature = (ChadoFeature) featureList.get(0);
id_store.put(Integer.toString(feature.getId()), feature.getUniquename());
List featurelocs = feature.getFeaturelocsForFeatureId();
ChadoFeatureLoc featureloc = (ChadoFeatureLoc) featurelocs.get(0);
int src = featureloc.getSrcfeature_id();
ChadoFeature parent = new ChadoFeature();
parent.setId(src);
List parentList = dao.getLazyFeature(parent);
parent = (ChadoFeature) parentList.get(0);
chado_gene.setSeqlen(parent.getLength());
chado_gene.setSrcfeature_id(src);
ByteBuffer buff = new ByteBuffer();
chadoToGFF(feature, null, null, null, null, dao,
featureloc, buff);
// get children of gene
List relations = feature.getFeatureRelationshipsForObjectId();
for(int i = 0; i < relations.size(); i++)
{
ChadoFeature transcript = new ChadoFeature();
transcript.setId(((ChadoFeatureRelationship) relations.get(i))
.getSubject_id());
featureList = dao.getLazyFeature(transcript);
transcript = (ChadoFeature) featureList.get(0);
id_store.put(Integer.toString(transcript.getId()), transcript
.getUniquename());
ChadoFeatureLoc loc = ChadoFeature.getFeatureLoc(transcript
.getFeaturelocsForFeatureId(), src);
chadoToGFF(transcript, feature.getUniquename(), null,
null, id_store, dao, loc, buff);
// get children of transcript - exons and pp
List transcipt_relations = transcript.getFeatureRelationshipsForObjectId();
for(int j = 0; j < transcipt_relations.size(); j++)
{
ChadoFeature child = new ChadoFeature();
child.setId(((ChadoFeatureRelationship) transcipt_relations.get(j))
.getSubject_id());
featureList = dao.getLazyFeature(child);
child = (ChadoFeature) featureList.get(0);
id_store.put(Integer.toString(child.getId()), child.getUniquename());
loc = ChadoFeature.getFeatureLoc(child.getFeaturelocsForFeatureId(),src);
chadoToGFF(child, transcript.getUniquename(), null,
null, id_store, dao, loc, buff);
}
}
return buff;
}
/**
* Convert the chado feature into a GFF line
* @param feat Chado feature
* @param parentFeature parent of this feature
* @param dbxrefs hashtable containing dbxrefs
* @param synonym hashtable containing synonynms
* @param id_store id store for looking up parent names
* @param dao chado data access
* @param featureloc feature location for this chado feature
private static void chadoToGFF(final ChadoFeature feat,
final String parentFeature,
final Hashtable dbxrefs,
final Hashtable synonym,
final Hashtable id_store,
final ChadoDAO dao,
final ChadoFeatureLoc featureloc,
final ByteBuffer this_buff)
int fmin = featureloc.getFmin() + 1;
int fmax = featureloc.getFmax();
int strand = featureloc.getStrand();
int phase = featureloc.getPhase();
String name = feat.getUniquename();
String typeName = getCvtermName(type_id, dao);
String timelastmodified = Long.toString(feat.getTimelastmodified().getTime());
Integer feature_id = new Integer(feat.getId());
String parent_id = null;
String parent_relationship = null;
if(feat.getFeature_relationship() != null)
{
ChadoFeatureRelationship feat_relationship = feat.getFeature_relationship();
parent_id = Integer.toString(feat_relationship.getObject_id());
long parent_type_id = feat_relationship.getCvterm().getCvtermId();
parent_relationship = feat_relationship.getCvterm().getName();
if(parent_relationship == null)
parent_relationship = getCvtermName(parent_type_id, dao);
}
else if(feat.getFeatureRelationshipsForSubjectId() != null)
{
List relations = feat.getFeatureRelationshipsForSubjectId();
for(int i=0; i<relations.size(); i++)
{
ChadoFeatureRelationship feat_relationship =
(ChadoFeatureRelationship)relations.get(i);
parent_id = Integer.toString(feat_relationship.getObject_id());
System.out.println("HERE "+i+" "+feat_relationship.getCvterm().getName()+ " "+
feat_relationship.getObject_id()+" "+feat_relationship.getSubject_id()+ " parent_id="+ parent_id);
parent_relationship = feat_relationship.getCvterm().getName();
}
}
if(parent_id != null && id_store != null && id_store.containsKey(parent_id))
parent_id = (String)id_store.get(parent_id);
// make gff format
Vector dbxref = null;
// append dbxrefs
if(dbxrefs != null &&
dbxrefs.containsKey(feature_id))
dbxref = (Vector)dbxrefs.get(feature_id);
if(((String)dbxref.get(j)).startsWith("GFF_source:"))
gff_source = ((String)dbxref.get(j)).substring(11);
dbxref.removeElementAt(j);
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
this_buff.append(parentFeature + "\t"); // seqid
if(gff_source != null)
this_buff.append(gff_source+"\t"); // source
else
this_buff.append("chado\t");
this_buff.append(typeName + "\t"); // type
this_buff.append(fmin + "\t"); // start
this_buff.append(fmax + "\t"); // end
this_buff.append(".\t"); // score
if(strand == -1) // strand
this_buff.append("-\t");
else if(strand == 1)
this_buff.append("+\t");
else
this_buff.append(".\t");
if(phase > 3)
this_buff.append(".\t"); // phase
else
this_buff.append(phase+"\t");
this_buff.append("ID=" + name + ";");
if(parent_id != null && !parent_id.equals("0"))
{
if(parent_relationship.equals("derives_from"))
this_buff.append("Derives_from=" + parent_id + ";");
this_buff.append("Parent=" + parent_id + ";");
}
this_buff.append("timelastmodified=" + timelastmodified + ";");
// this is the chado feature_relationship.rank used
// to order features e.g. exons
if(rank > -1)
this_buff.append("feature_relationship_rank="+rank+";");
//this_buff.append("feature_id="+feature_id+";");
// attributes
Hashtable qualifiers = feat.getQualifiers();
if(qualifiers != null && qualifiers.size() > 0)
{
Enumeration e_qualifiers = qualifiers.keys();
while(e_qualifiers.hasMoreElements())
Long qualifier_type_id = (Long)e_qualifiers.nextElement();
String qualifier_name = getCvtermName(qualifier_type_id.longValue(), dao);
if(qualifier_name == null)
continue;
Vector qualifier_value = (Vector)qualifiers.get(qualifier_type_id);
for(int j=0; j<qualifier_value.size(); j++)
{
ChadoFeatureProp featprop = (ChadoFeatureProp)qualifier_value.get(j);
if(featprop.getValue() != null)
this_buff.append(qualifier_name+ "=" +
GFFStreamFeature.encode(featprop.getValue())+";");
// append dbxrefs
if(dbxref != null && dbxref.size() > 0)
{
this_buff.append("Dbxref=");
for(int j=0; j<dbxref.size(); j++)
this_buff.append((String)dbxref.get(j));
if(j<dbxref.size()-1)
this_buff.append(",");
this_buff.append(";");
}
// append synonyms
if(synonym != null &&
synonym.containsKey(feature_id))
Vector v_synonyms = (Vector)synonym.get(feature_id);
for(int j=0; j<v_synonyms.size(); j++)
{
alias = (ChadoFeatureSynonym)v_synonyms.get(j);
this_buff.append( getCvtermName(alias.getSynonym().getCvterm().getCvtermId(), dao) + "=" );
//this_buff.append(alias.getSynonym().getCvterm().getName()+"=");
this_buff.append(alias.getSynonym().getName());
if(j<v_synonyms.size()-1)
this_buff.append(";");
/**
* Look up the cvterm_id for a controlled vocabulary name.
* @param name
* @return
*/
{
Enumeration enum_cvterm = cvterm.keys();
while(enum_cvterm.hasMoreElements())
{
Long key = (Long)enum_cvterm.nextElement();
if(name.equals(cvterm.get(key)))
* Look up a cvterm name from the collection of cvterms.
* @param id a cvterm_id
* @return the cvterm name
private static String getCvtermName(long id, ChadoDAO dao)
* @param dao the data access object
* @return the cvterm <code>Hashtable</code>
Iterator it = cvtem_list.iterator();
while(it.hasNext())
{
System.err.println("SQLException retrieving CvTerms");
/**
* Get the sequence for a feature.
* @param dao the data access object
* @param buff the buffer to add the sequence to
* @return the resulting buffer
* @throws java.sql.SQLException
*/
private ByteBuffer getChadoSequence(ChadoDAO dao, ByteBuffer buff)
ChadoFeature feature = dao.getSequence(Integer.parseInt(feature_id));
/**
* Get the <code>List</code> of available schemas.
* @return the <code>List</code> of available schemas
*/
public List getSchema()
{
return schema_list;
}
* Create a hashtable of the available entries with residues.
* @return a <code>Hashtable</code> of the <code>String</code>
* representation (schema-type-feature_name) and the
* corresponding feature_id
* @throws ConnectException
* @throws java.sql.SQLException
*/
ChadoDAO dao = null;
try
{
dao = getDAO();
}
catch(ConnectException exp)
{
JOptionPane.showMessageDialog(null, "Connection Problems...\n"+
exp.getMessage(),
"Connection Error",
JOptionPane.ERROR_MESSAGE);
throw exp;
}
catch(java.sql.SQLException sqlExp)
{
JOptionPane.showMessageDialog(null, "SQL Problems...\n"+
sqlExp.getMessage(),
"SQL Error",
JOptionPane.ERROR_MESSAGE);
throw sqlExp;
}
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).startTransaction();
if(list.size() == 0) // no residues for this organism
continue;
List list_residue_features = dao.getResidueFeatures(list, schema);
Iterator it_residue_features = list_residue_features.iterator();
while(it_residue_features.hasNext())
{
ChadoFeature feature = (ChadoFeature)it_residue_features.next();
String typeName = getCvtermName(feature.getCvterm().getCvtermId(), getDAO());
db.put(schema + " - " + typeName + " - " + feature.getUniquename(),
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).commitTransaction();
JOptionPane.showMessageDialog(null, "SQL Problems...\n"+
sqlExp.getMessage(),
"SQL Error",
finally
{
if(dao instanceof IBatisDAO)
((IBatisDAO) dao).endTransaction();
}
/**
* Get the data access object (DAO).
* @return data access object
*/
private ChadoDAO getDAO()
throws java.net.ConnectException, SQLException
{
if(!iBatis)
{
if(jdbcDAO == null)
jdbcDAO = new JdbcDAO((String)getLocation(), pfield);
return jdbcDAO;
}
else
{
if(connIB == null)
connIB = new IBatisDAO(pfield);
return connIB;
}
}
* Create a new OutputStream object from this Document. The contents of the
* Document can be written from the stream.
*
* @exception IOException
* Thrown if the Document can't be written.
*/
public OutputStream getOutputStream() throws IOException
{
final File write_file = new File(System.getProperty("user.dir")+
System.getProperty("file.separator")+
getName());
final FileOutputStream file_output_stream =
new FileOutputStream(write_file);
if(write_file.getName().endsWith(".gz"))
{
// assume this file should be gzipped
return new java.util.zip.GZIPOutputStream (file_output_stream);
}
else
return file_output_stream;