diff --git a/corba/apollo.idl b/corba/apollo.idl new file mode 100644 index 0000000000000000000000000000000000000000..49b097261fdecd9666cd05900640a6125aeac5f1 --- /dev/null +++ b/corba/apollo.idl @@ -0,0 +1,206 @@ +// Apollo transport layer +// all syntax subject to change +// typedef sequence <Xxx> XxxList assumed +// maybe some of the paramsets should be +// more strongly typed +// +// Ewan Birney and Chris Mungall. Apollo list apollo@ebi.ac.uk +// + +module Apollo { + + exception NotSupported { string reason; }; // more exceptions to come + exception ProcessError { string reason; }; + exception OutOfRange { string reason; }; + exception NeedsUpdate { string reason;}; + + struct Param { + string name; + string value; + }; + typedef sequence <Param> ParamList; + + // use a enum for unit (%, ratio, etc)? + struct Score { + string type; + string value; + // EB - lets also have this as a number. Clients have to figure out by the + // type. + double double_value; + }; + typedef sequence <Score> ScoreList; + + // we should abstract out identifier aspects into + // a seperate struct - that way we can attach this to + // any kind of object + struct Identifier { + string name; //main display label + string description; //detailed desc + sequence <string> synonyms; + // DbXrefList dbxrefs; + }; + typedef sequence <Identifier> IdentifierList; + + + enum StrandType { plus, minus}; + // a range can be attached to any seq-featurey object + struct Range { + long range_min; + long range_max; + StrandType strand; + }; + + + struct ResultSpan { + string result_id; + ScoreList scores; + Range range1; + Range range2; + }; + typedef sequence<ResultSpan> ResultSpanList; + + + // any kind of analysis result or alignment + // (genscan-gene, genscan-exon, sim4exonset, sim4exon, blasthit, + // blast-hsp, etc) + struct ResultSet { + string result_id; + ScoreList scores; + string type; + ResultSpanList ResultSpans; // eg hsps for a blast hit + Range range1; // eg query start/end + Range range2; // eg subject start/end + }; + typedef sequence <ResultSet> ResultSetList; + + + + // Evidence is one of the Result lists + typedef string Evidence; + + typedef sequence <Evidence> EvidenceList; + + // collection of analysis results + struct Analysis { + // eg Blast, Pfam + string program; + ParamList parameters; + ResultList results; + }; + typedef sequence <Analysis> AnalysisList; + + // Annotation Comments etc + + + struct Person { + string readable_name; + string person_id; + } + + typedef long TimeStamp; + + + struct Comment { + string comment_id; + string text; + Person person; + TimeStamp time; + }; + typedef sequence<Comment> CommentList; + + + + // Design decision: most of these inherit from a notional seqfeature + // superclass - do we (1) merge them into a single struct, with + // the seqfeature struct having 'type' and contained-seqfeatures + // or (2) have distinct structs and delegate out the commonalities. + // i chose the latter, with all the strcuts having a Range attribute + + + // to fetch the sequence for a gene, it has to be spliced + // from the exons + struct Exon { + Identifier ident; + Range range; + EvidenceList evidence_list; + }; + + typedef sequence <Exon> ExonList; + + struct Transcript { + Identifier ident; + ExonList exons; + Range cds_Range; // start/end of translation + // note we don't need range including UTR, its implicit from exons + EvidenceList evidence_list; + CommentList comments; + }; + typedef sequence <Transcript> TranscriptList; + + enum GeneType { PROTEIN_CODING_GENE, TRNA_GENE, TRANSPOSON_GENE }; + + // Where does silly text annotation go? + struct AnnotatedGene { + GeneType type; + Identifier ident; + TranscriptList transcripts; + CommentList comments; + }; + typedef sequence <AnnotatedGene> AnnotatedGeneList; + + struct GenericAnnotation { + Identifier ident; + string type; + ParamList qualifiers; + CommentList comments; + Range range; + EvidenceList evidence_list; + }; + + typedef sequence <GenericAnnotation> GenericAnnotationList; + + + + // collection of annotations and analyses on + // a particular piece of sequence + // (could be a clone, a contig , a scaffold (order&oriented contigs), an + // arbitrary slice of a scaffold, a chromosome, etc) + interface AnnotatedRegion { + // bind sequence here + string sequence_as_string(); + string sequence_region_as_string(in long start,in long end) + raises ( OutOfRange ); + + // gets + AnalysisList get_analysis_list() raises ( ProcessError ) ; + AnnotatedGeneList get_gene_list() raises (ProcessError); + GenericAnnotation get_generic_annotation() raises (ProcessError); + + + // sets + void save_AnnotatedGenes(in AnnotatedGeneList new, + in AnnotatedGeneList updated, + in AnnotatedGeneList dead) + raises (NeedsUpdate, ProcessError,OutOfRange); + + void save_GenericAnnotation(in GenericAnnotationList new, + in GenericAnnotationList updated, + in GenericAnnotationList dead) + raises (NeedsUpdate, ProcessError, OutOfRange ); + }; + + // session [or persistence handle] + interface Session { + void connect(in ParamList param_set); + AnnotatedRegion get_AnnotatedRegion(in string id); + }; + + // singleton; + interface SessionManager { + Session initiate_Session(in ParamList param_set); + Session retrieve_Session(in ParamList param_set) + raises (NotSupported); + }; + + +}; diff --git a/corba/ensembl.idl b/corba/ensembl.idl new file mode 100644 index 0000000000000000000000000000000000000000..1c5eae45d09612bd3ccc7c46662188a7a28a6d9c --- /dev/null +++ b/corba/ensembl.idl @@ -0,0 +1,53 @@ + +module Ensembl { + module artemis { + + exception RequestedSequenceTooLong { }; + exception NoEntry { string reason; }; + + interface BioSequence { + string getSubSequence(in long start,in long end) raises (RequestedSequenceTooLong); + long length(); + long max_sequence_request(); + }; + + struct Qualifier { + string name; + sequence<string> values; + }; + + typedef sequence <Qualifier> QualifierList; + + interface Feature { + string getKey(); + string getLocation(); + QualifierList getQualifiers(); + }; + + typedef sequence <Feature> FeatureList; + + interface Entry { + string getName(); // accession number usually. + long getFeatureCount(); + FeatureList getAllFeatures(); + BioSequence getSequence(); + }; + + typedef sequence<string> QualifierDefinitionList; + struct FeatureDefinition { + string key; + QualifierDefinitionList qualifiers; + }; + typedef sequence<FeatureDefinition> FeatureDefinitionList; + + typedef sequence<string> EntryNameList; + + interface DB { + Entry getEntry(in string entryname) raises (NoEntry); + EntryNameList getallEntryNames(); + + // coordination of the ORB. + FeatureDefinitionList getFeatureDefinitionList(); + }; + }; +}; diff --git a/corba/nsdb.idl b/corba/nsdb.idl new file mode 100644 index 0000000000000000000000000000000000000000..4d0108904b13c5d3cab5be4adf9b05a58ed873a7 --- /dev/null +++ b/corba/nsdb.idl @@ -0,0 +1,560 @@ +/* ************************************************************************** + * $Source: //tmp/pathsoft/artemis/corba/nsdb.idl,v $ + * $Revision: 1.1 $ + * $Date: 2004-06-09 12:06:34 $ + * $Author: tjc $ + * **************************************************************************/ +// Version 2.0 +#ifndef embl_ebi_nsdb_idl +#define embl_ebi_nsdb_idl + +#include "types.idl" +#include "seqdb.idl" + + + /** + * IDL interfaces for the + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/embl_db.html"> + * EMBL Nucleotide Sequence Database</A>. + */ + module nsdb { + // ==================== + // Forward declarations + // ==================== + interface NucSeq; + typedef sequence<NucSeq> NucSeqList; + interface NucFeature; + typedef sequence<NucFeature> NucFeatureList; + interface Location; + typedef sequence<Location> LocationList; + interface FeatureLocation; + typedef sequence<FeatureLocation> FeatureLocationList; + interface EntryInfo; + typedef sequence<EntryInfo> EntryInfoList; + + + /** + * If a sub-sequence is retrieved for which the location information is + * inexact, an InexactLocation is raised + */ + exception InexactLocation { string reason;}; + + /** + * The EMBL database contains information, which is not really part of the + * sequence information. This information is stored in the EntryInfo. + */ + + interface EntryInfo { + + /** + * Retrieve entry indentifier. More information on the + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/usrman/id_line.html">format</A> of + * an entry name is available in the + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/usrman/usrman.html"> + * EMBL User Manual</A>. + */ + string getEntryName(); + + /** + * The entry version defines the current version of an EMBL Sequence Database + * Entry. I.e. the complete set information related to a particular sequence. + * The Entry version is incremented whenever anything changes in the sequence or it's + * associated information. + */ + unsigned long getEntryVersion(); + + + /** + * Get entry status code. + * @see meta::nsdb + */ + string getEntryStatus(); + + + /** + * Sequence of revisions when Entry was created/changed. + */ + type::RevisionList getRevisions(); + + /** + * List of secondary accession numbers. I.e. accession numbers + * of deprecated entries, now merged into the current entry or + * split over multiple entries, as decribed in the + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/usrman/ac_line.html"> + * AC line</A> documentation in the User manual. + */ + type::stringList getSecondaryIds(); + + /** + * number of adenine + */ + unsigned long getCountA(); + + /** + * number of cytosine + */ + unsigned long getCountC(); + + /** + * number of guanine + */ + unsigned long getCountG(); + + /** + * number of thymine (in DNA sequence) + * or + * number of uracil (in RNA sequence) + */ + unsigned long getCountT(); + + + }; + + // ========================= + // Location + // ========================= + + + /** + * A location is built from one or more nodes forming a tree + * local node IDs are only unique within a location tree + * the root node has id == 0 + * the Node id defines the position of the node in the sequence<LocationNode> + * The value is relative to the current (parent) node + * e.g if the current node is at position x in the LocationNodes sequence + * and there is a childId with value j + * then the position of this child in the LocationNodes sequence will be x+j + */ + interface Location { + + typedef sequence<unsigned long> IdList; + + /** + * valid types of Location Nodes in a Location + * are defined by LocNodeTypeCode. Currently values 1-4 are in use. + * 6-10 are reserved for future use. + */ + typedef long LocNodeTypeCode; + + /** + * The sequence segment is derived from another sequence + */ + const long VirtualSegment_ltc = 1; + + /** + * The sequence segment is explicitly given as a DNA string. + */ + const long PhysicalSegment_ltc = 2; + + /** + * Sequence is unknown. Only it's (estimated) size in known. + */ + const long Gap_ltc = 3; + + /** + * defines how to link a set of LocationNodes + */ + const long Operator_ltc = 4; + + + + /** + * Virtual segment of a sequence.The sequence segment is derived from another sequence. + * <p><dl> + * <dt>bio_seq_id + * <dd>sequence from which this segment is derived + * <dd>contains an accession number + * <dt>start + * <dd>start position of segment (inclusive) + * <dt>end + * <dd>end position of segment (inclusive) + * <dd>In the case that start defines a 'between' position, end is unused + * <dt>complement + * <dd> true if segment should be complemented before any further manipulations + * </dl> + */ + struct LocVirtualSegment { + string bio_seq_id; + type::Fuzzy start; + type::Fuzzy end; + boolean complement; + }; + + /** + * spacer between sequence fragments + */ + typedef type::Fuzzy LocGap; + + /** + * The sequence segment is explicitly given as a DNA string + * that should be inserted literally. + */ + typedef string LocPhysicalSegment; + + /** + * Location operator. This is a node in the location tree that combines + * nodes lower down in the tree + * <dl> + * <dt> op + * <dd> operator defining what to do with the nodes + * <dd> <dl> + * <dt> join + * <dd> The indicated elements should be joined (placed end-to-end) + * to form one contiguous sequence + * <dt> order + * <dd> The elements can be found in the specified order + * (5' to 3' direction), but nothing is implied about the + * reasonableness about joining them + * </dl> + * <dt> childIds + * <dd> identifiers of the child nodes + * <dd> Ids is an array of IDs relative to the current node + * e.g if the current node is at position x in the LocationNodes sequence + * and there is a childId with value j + * then the position of this child in the LocationNodes sequence will be x+j + *</dl> + */ + struct LocOperator { + string op; + IdList childIds; + }; + + union LocationNode_u switch (LocNodeTypeCode) { + case VirtualSegment_ltc: LocVirtualSegment virtual; + case PhysicalSegment_ltc: LocPhysicalSegment physical; + case Gap_ltc: LocGap gap; + case Operator_ltc: LocOperator operator; + }; + + typedef sequence<LocationNode_u> LocationNodeList; + + /** + * retrieve + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/components.html#location"> + * string representation of location</A> + */ + string getLocationString(); + + + /** + * retrieve tree representation of location + */ + LocationNodeList getNodes(); + + + + /** + * Create nucleotide sequence defined by location. This can imply getting + * fragments from multiple sequences and concatenating. + * If it is not possible to resolve the location into a single sequence + * (e.g. when it contains a 'group' operator, or gap nodes) + * each fragment will be returned as a seperate string. No assumption should be + * made on the order, if multiple fragments are returned + * @raises InexactLocation if an exact sequence cannot be + * determined due to the location being inexact + */ + string getSeq() + raises ( InexactLocation ); + + }; + /** + * Location of a NucFeature + * This interface does not allow to change the nuc_feature + * If a location is assigned to a nucfeature, the inverse relation + * should be properly updated + * @see NucFeature.getLocation + */ + interface FeatureLocation : Location { + + /** + * nucfeature to which the location is associated + */ + NucFeature getNucFeature(); + }; + + + // ============================ + // Features + // ============================ + /** + * Nucleotide Sequence Feature interface. Features are <I>owned</I> by a + * sequence and contain information about that (and maybe other) sequence. + * The relation between the feature and the sequence is defined by it's + * location. + * @see Location + */ + + interface NucFeature : seqdb::Feature { + + /** + * Qualifier TypeCode definitions. + * Each Qualifier type has an assigned typecode. values 1-100 + * are reserved to allow for future extension. + * <p> + * this is a stripped down version of the <B>featuremeta</B> IDL. Types which + * are typedefs of the same base-type in <B>featuremeta</B> are not + * distinguished here. + * @see featuremeta + */ + typedef long QualifierTypeCode; + + // values 1-100 are reserved for EBI QualifierTypeCodes + const long string_qtc = 1; + const long boolean_qtc = 2; + const long integer_qtc = 3; + const long real_qtc = 4; + const long TranslationException_qtc = 17; + const long CodonTranslation_qtc = 18; + const long Anticodon_qtc = 19; + const long SpliceConsensus_qtc = 20; + const long RepeatUnit_qtc = 21; + const long DbXref_qtc = 22; + + + union QualifierValue_u switch (QualifierTypeCode) { + + // EBI typecodes + case boolean_qtc : boolean applicable; + case string_qtc : string text; + case integer_qtc : long integer; + case real_qtc : float real; + case TranslationException_qtc : type::TranslationException translation_exception; + case CodonTranslation_qtc : type::CodonTranslation codon_translation; + case Anticodon_qtc : type::AntiCodon anti_codon; + case SpliceConsensus_qtc : type::SpliceConsensus splice_consensus; + case RepeatUnit_qtc : type::RepeatUnit repeat_unit; + case DbXref_qtc : type::DbXref db_xref; + + // add your own extension types below +#ifdef ANALYSIS + // analysis qualifier extension + // see analysis.idl + case ANALYSIS::Analysis_qtc : ANALYSIS::Scores score; +#endif + }; + + + typedef sequence<QualifierValue_u> QualifierValueList; + + /** + * Qualifier.<p> + * <dl> + * <dt> name + * <dd> name of the qualifier + * <dt> values + * <dd> sequence of QualifierValues. All QualifierValues associated with + * a single Qualifier are of the same type + * </dl> + */ + struct Qualifier { + string name; + QualifierValueList values; + }; + + typedef sequence<Qualifier> QualifierList; + + /** + * Retrieve sequence of qualifiers. + * @raises type::NoResult if no qualifiers are associated with + * the feature. + */ + QualifierList getQualifiers() + raises (type::NoResult); + + + + /** + * retrieve qualifier of a certain type. + * To find out which qualifier/feature combinations are valid, a client should + * query the NucFeatureMeta server. + * @raises type::NoResult if no qualifier of this type is associated with + * the feature + * @raises type::InvalidRelation if the requested qualifier cannot be + * associated to the current feature type + * @see metafeature::NucFeatureMeta + */ + Qualifier getQualifier(in string qualifier_name) + raises (type::NoResult, type::InvalidRelation); + + + /** + * If the location of the feature references multiple sequences, get a + * sequence of all sequences referenced + * This method is equivalent to retrieving the feature location, and looping + * through all location nodes to find the referenced sequences, converting + * the accession numbers into DbXref's. + */ + type::DbXrefList getNucSeqs(); + + /** + * retrieve location of feature. + * @raises type::NoResult if no location is associated with the feature. + */ + + FeatureLocation getLocation() + raises (type::NoResult); + + + }; + + /** + * Generic Nucleotide sequence interface. + * The accession number is retrieved using the getBioSeqId method inherited from BioSeq. + * @see EmblSeq + * @see seqdb::BioSeq + */ + + interface NucSeq : seqdb::BioSeq { + + + /** + * retrieve string representation of nucleotide sequence. Each character + * in the string is a + * <href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/iupac_codes.html"> + * IUPAC nucleotide base code</A> + * <P> + * This method can be used instead of the getAnySeq() + * method in seqdb::BioSeq + * @see seqdb::BioSeq + */ + string getSeq(); + + + /** + * Checksum on sequence to allow validation. + * + */ + unsigned long getCheckSum(); + + /** + * topology of the nucleotide sequence + * @see meta::nsdb + */ + string getTopology(); + + + /** + * molecule type of the nucleotide sequence + * @see meta::nsdb + */ + string getMoleculeType(); + + + /** + * retrieve sequence of NucFeatureList associated with + * the nucleotide sequence. + * A sequence has <I>ownership</I> of all these + * features. It is possible on the other hand that + * features, owned by another sequence, reference + * the current sequence. Currently there is no way to find out. + * @raises type::NoResult if no features are owned by the sequence + */ + NucFeatureList getNucFeatures() + raises (type::NoResult); + + + /** + * A location of a NucFeature can span multiple sequences. + * If only the location of a feature relevant to the current + * sequence is required, this method will calculate that. + * @parm nuc_feature Feature from which location needs to be + * intersected with the current sequence. + * @raises type::InvalidRelation if the nuc_feature + * is not associated to the current sequence + */ + Location getLocalLocation(in NucFeature nuc_feature) + raises (type::InvalidRelation); + + /** + * organism(s) from which the NucSeq was obtained. + * If the sequence is chimeric, multiple organisms will be returned. + * For each organism, there should be a source feature associated + * with the current sequence. This source feature has a location + * defining which part of the sequence was derived from the specified + * organism. + * This method provides a shortcut to: + * invoke getNucFeaturesByKey("source"), invoke getQualifiers() on each + * source feature and loop through qualifiers to find DbXref. + * @raises type::NoResult if no source features are associated with + * the sequence (should never happen). + */ + type::DbXrefList getOrganisms() + raises (type::NoResult); + + /** + * retreive all features of a specific kind (FeatureKey) + * @parm key Type of features to be retrieved + * @raises type::NoResult if the sequence has no associated features + * of the requested type. + * @raises type::InvalidArgumentValue if the key is not a + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/feature_table.html"> + * valid feature key</A> + * @see NucFeature + */ + NucFeatureList getNucFeaturesByKey(in string key) + raises (type::NoResult, type::InvalidArgumentValue); + + /** + * Create nucleotide sequence of the segment specified + * The first base in the sequence is numbered 1 + * + * @parm start first base of sub-sequence (inclusive) + * @parm end last base of sub-sequence (inclusive) + * @raises type::IndexOutOfRange if start < 1 + * or end > length + */ + string getSubSeq(in unsigned long start, in unsigned long end) + raises (type::IndexOutOfRange); + + /** + * Create nucleotide sequence of the location derived from the specified feature + * and contained in the current NucSeq. + * @raises type::InvalidRelation if the nuc_feature + * is not associated to the current sequence + * @raises InexactLocation if an exact sequence cannot be + * determined due to the feature's location being inexact + */ + string getSubSeqByFeature(in NucFeature feature) + raises (type::InvalidRelation, InexactLocation); + /** + * @raises type::InvalidRelation if reference does not exist + * @raises type::InvalidRelation if reference is not + * associated to the current sequence + * @raises type::NoResult if there is no location associated with this reference + */ + Location getReferenceLocation(in string reference_id) + raises (type::InvalidRelation, type::NoResult); + }; + + /** + * EMBL Nucleotide sequence interface. An EMBL sequence contains all information + * to generate a flat-file entry out of it. + * It defines no new behaviour but inherits from + * several interfaces and acts as a container of the combined functionality. + */ + + interface EmblSeq : NucSeq, seqdb::SeqInfo , EntryInfo { + }; + + +/** + * Entry point for Nucleotide Sequence Database. + * Query methods on the database are defined here. + * Currently this is minimal, but should be extended later on. + */ +interface Embl { + +exception Superceded { type::stringList bio_seq_ids; }; + + /** + * retrieve Nucleotide sequence, given it's accession number. If a client only supports + * NucSeq, and not EmblSeq, than it can widen the EmblSeq to a NucSeq (implicit). + * @raises type::NoResult if the accession number does not exist. + * @raises Superceded if the sequence referenced by the accession + * number does not exist any more, because it was merged, or split. + */ + EmblSeq getEmblSeq(in string bio_seq_id) + raises (type::NoResult, Superceded); +}; +}; +#endif // embl_ebi_nsdb_idl diff --git a/corba/nsdb_write.idl b/corba/nsdb_write.idl new file mode 100644 index 0000000000000000000000000000000000000000..27a1720655d1122a4e344e89a3fff85bd27999bd --- /dev/null +++ b/corba/nsdb_write.idl @@ -0,0 +1,260 @@ +#ifndef EMBL_EBI_NSDB_WRITE_IDL +#define EMBL_EBI_NSDB_WRITE_IDL + +#include "nsdb.idl" + +module nsdb { + exception OutOfDate { }; + + exception ReadOnlyException { }; + + exception LocationParse { + string reason; + }; + + exception QualifierParse { + string reason; + }; + + exception InvalidQualifier { }; + + interface EmblSeqWriter; + interface NucFeatureWriter; + + typedef sequence<NucFeatureWriter> NucFeatureWriterList; + + + // datestamp_t is seconds since the standard "the epoch", namely + // January 1, 1970, 00:00:00 GMT + typedef long datestamp_t; + + struct Datestamp { + datestamp_t value; + }; + + struct EntryStats { + string name; + datestamp_t last_change_time; + }; + + struct ServerInfo { + // Information about the loaded entries + sequence <EntryStats> entry_stats_list; + // Information about the files in the server directory + sequence <EntryStats> file_stats_list; + }; + + interface EmblWriter : Embl { + + /** + * Retrieve a writable Nucleotide sequence object, given a + * sequence_id. + * @raises type::NoResult if the given sequence_id does not exist. + */ + EmblSeqWriter getEmblSeqWriter (in string sequence_id) + raises (type::NoResult); + + /** + * Return a ServerInfo structure for the server. + **/ + ServerInfo getServerInfo (); + }; + + + exception InvalidKey { }; + + + exception CommitFailed { + string reason; + }; + + interface EmblSeqWriter : EmblSeq { + + /** + * Create a new feature in this EmblSeq object. + * @parm key Type of the feature to be created + * @parm location_string The location of the new NucFeature + * @raises LocationParse If the location string is not a valid location. + * @raises type::IndexOutOfRange If any part of the location is beyond the + * end of the sequence + * @raises InvalidKey if the given key is not a possible EMBL key. + */ + NucFeatureWriter createNucFeature (in string key, + in string location) + raises (LocationParse, type::IndexOutOfRange, InvalidKey, + ReadOnlyException); + + /** + * Remove the given feature. + */ + void remove (in NucFeature nuc_feature) + raises (ReadOnlyException); + + /** + * retrieve sequence of NucFeatureList associated with + * the nucleotide sequence that are within the given range of bases. + * @raises type::NoResult if no features are owned by the sequence + * @raises type::IndexOutOfRange if either of start_base or end_base is + * less than 1 or greater than the length of the sequence. + */ + NucFeatureList getNucFeaturesInRange (in long start_base, in long end_base) + raises (type::NoResult, type::IndexOutOfRange); + + + /** + * Return the number of features + */ + long getNucFeatureCount (); + + /** + * Return the ith NucFeature from this Entry. The feature are returned in + * a consistent order, sorted by the first base of each Feature. + * @raises type::IndexOutOfRange if the index is less than 0 or greater + * than the number of features. + **/ + NucFeature getFeatureAtIndex (in long i) + raises (type::IndexOutOfRange); + + /** + * Return the index of the given Feature. This does the reverse of + * getFeatureAtIndex (). Returns -1 if the given NucFeature is not in + * this EmblSeq object. + **/ + long indexOf (in NucFeature feature); + + /** + * commit any pending changes to the database immediately. + */ + void commit () + raises (CommitFailed); + + /** + * Return a Datestamp that will be passed to the set methods on the + * EmblSeqWriter methods and NucFeatureWriter methods. The object that is + * returned represents the time when the entry was last changed (the last + * time a feature was added, removed or changed location). + */ + Datestamp getDatestamp (); + }; + + + interface NucFeatureWriter : NucFeature { + /** + * Set the key, location and qualifiers of this NucFeature + * @parm key The new feature key + * @parm location The new feature location + * @raises InvalidKey if the given key is not a possible EMBL key. + * @raises LocationParse If the location string is not a valid location. + * @raises type::IndexOutOfRange If any part of the location is out + * of range for the sequence. + * @raises type::InvalidRelation if one of the qualifiers in this + * feature cannot be associated with the given feature key. + * @raises QualifierParse if the format of any the qualifiers is not + * appropriate for a Qualifier with the given name. For + * example the value part of /codon_start qualifier must be a number: 1, + * 2 or 3. Also thrown if a qualifier has value when it should not or + * vice versa. + * @raises InvalidQualifier if the name of the Qualifier is not a + * valid embl qualifier name. + * @raises ReadOnlyException If this Feature cannot be changed. + * @raises OutOfDate If the key has changed since the time given by + * datestamp. + */ + void set (in Datestamp datestamp, + in string key, + in string location, + in QualifierList qualifier_list) + raises (InvalidKey, LocationParse, type::IndexOutOfRange, + type::InvalidRelation, QualifierParse, + InvalidQualifier, OutOfDate, ReadOnlyException); + + /** + * Set the key of this NucFeature + * @parm key The new feature key + * @raises InvalidKey if the given key is not a possible EMBL key. + * @raises type::InvalidRelation if one of the qualifiers in this + * feature cannot be associated with the given feature key. + * @raises OutOfDate If the key has changed since the time given by + * datestamp. + */ + void setKey (in Datestamp datestamp, + in string key) + raises (InvalidKey, type::InvalidRelation, OutOfDate, ReadOnlyException); + + /** + * Set the location of this NucFeature + * @parm location The new feature location + * @raises LocationParse If the location string is not a valid location. + * @raises type::IndexOutOfRange If any part of the location is out + * of range for the sequence. + * @raises OutOfDate If the location has changed since the time given by + * datestamp. + */ + void setLocation (in Datestamp datestamp, + in string location) + raises (LocationParse, type::IndexOutOfRange, OutOfDate, ReadOnlyException); + + /** + * Set the qualifiers of this feature, replacing the current qualifiers. + * @raises InvalidRelationException if this Feature cannot + * contain one of the given qualifiers. + * @raises QualifierParse if the format of any the qualifiers is not + * appropriate for a Qualifier with the given name. For + * example the value part of /codon_start qualifier must be a number: 1, + * 2 or 3. Also thrown if a qualifier has value when it should not or + * vice versa. + * @raises InvalidQualifier if the name of the Qualifier is not a + * valid embl qualifier name. + * @raises OutOfDate if any of the qualifiers has changed since the time + * given by datestamp. + */ + void setQualifiers (in Datestamp datestamp, + in QualifierList qualifier_list) + raises (type::InvalidRelation, QualifierParse, InvalidQualifier, + OutOfDate, ReadOnlyException); + + /** + * Add the given Qualifier to this Feature. If this Feature contains a + * Qualifier with the same name as the new Qualifier it will be replaced. + * @parm qualifier The new qualifier to add. + * @raises InvalidRelationException if this Feature cannot + * contain the given Qualifier. + * @raises QualifierParse if the format of the qualifier is not + * appropriate for a Qualifier with the given name. For + * example the value part of /codon_start qualifier must be a number: 1, + * 2 or 3. Also thrown if a qualifier has value when it should not or + * vice versa. + * @raises InvalidQualifier if the name of the Qualifier is not a + * valid embl qualifier name. + * @raises OutOfDate if there is a an existing qualifier with the same + * name as the argument qualifier and it has changed since the time + * given by datestamp. + */ + void setQualifier (in Datestamp datestamp, + in Qualifier qualifier) + raises (type::InvalidRelation, QualifierParse, InvalidQualifier, + OutOfDate, ReadOnlyException); + + /** + * Remove the Qualifier with the given name. If there is no Qualifier + * with that name, then return immediately. + * @parm name The Qualifier name to look for. + * @raises OutOfDate if there is a an existing qualifier with the same + * name as the argument name and it has changed since the time given by + * the datestamp. + */ + void removeQualiferByName (in Datestamp datestamp, + in string name) + raises (type::InvalidRelation, OutOfDate, ReadOnlyException); + + /** + * Return a Datestamp that will be passed to the set methods on the + * EmblSeqWriter methods and NucFeatureWriter methods. The object that is + * returned represents the time when the feature was last changed (the + * last time the key, location or qualifiers changed). + */ + Datestamp getDatestamp (); + }; +}; + +#endif diff --git a/corba/seqdb.idl b/corba/seqdb.idl new file mode 100644 index 0000000000000000000000000000000000000000..7be3f25a68c24eeda6493fc254e21189eeebcdfd --- /dev/null +++ b/corba/seqdb.idl @@ -0,0 +1,131 @@ +/* ************************************************************************** + * $Source: //tmp/pathsoft/artemis/corba/seqdb.idl,v $ + * $Revision: 1.1 $ + * $Date: 2004-06-09 12:06:36 $ + * $Author: tjc $ + * **************************************************************************/ + +#ifndef embl_ebi_seqdb_idl +#define embl_ebi_seqdb_idl + +#include "types.idl" +/** + * seqdb contains the (abstract) definition + * of common attributes of biosequences + */ + module seqdb { + + + /** + * generic biosequence. Provides al functionality we would like to see + * on any sequence. + */ + interface BioSeq { + + /** + * retrieve unique identifier + */ + string getBioSeqId(); + + + + /** + * length (nr of elements) of the biosequence + */ + unsigned long getLength(); + + + /** + * sequence of objects describing the elements in the biosequence + * This is a generic description. Most subclasses + * will define more convenient methods for accessing the biosequence. + * @returns any containing an set of objects. The any should have + * a typecode tk_array. + */ + any getAnySeq(); + + + + /** + * Return current version of the BioSeq. returns 0 if versioning is not + * implemented on the bioseq. + */ + unsigned long getBioSeqVersion(); + + }; + + /** + * generic Feature. Only has a key to identify it's type, an ID to + * identify the instance and a version. + */ + interface Feature { + + /** + * feature identifier. + */ + string getFeatureId(); + + + + /** + * Feature types are defined by a key. + */ + string getKey(); + + + + /** + * return current version of the feature. returns 0 if + * no versioning is implemented. + */ + unsigned long getFeatureVersion(); + + }; + + /** + * Information associated with a sequence + */ + interface SeqInfo { + /** + * short (one line) description + */ + string getDescription() + raises (type::NoResult); + + + /** + * sequence of keywords, describing the characteristics of the sequence + */ + type::stringList getKeywords() + raises (type::NoResult); + + + /** + * sequence of comments, describing the characteristics of the sequence + */ + type::stringList getComments() + raises (type::NoResult); + + + /** + * cross references to other databases containing related or additional + * information + */ + type::DbXrefList getDbXrefs() + raises (type::NoResult); + + /** + * cross references to the EMBL publication database + * information + */ + type::DbXrefList getReferences() + raises (type::NoResult); + + }; + + }; + + + + +#endif // embl_ebi_seqdb_idl diff --git a/corba/types.idl b/corba/types.idl new file mode 100644 index 0000000000000000000000000000000000000000..717f3cb6e3ac70113252b61b0cd6dc5f167dacc7 --- /dev/null +++ b/corba/types.idl @@ -0,0 +1,334 @@ +/* ************************************************************************** + * $Source: //tmp/pathsoft/artemis/corba/types.idl,v $ + * $Revision: 1.1 $ + * $Date: 2004-06-09 12:06:37 $ + * $Author: tjc $ + * **************************************************************************/ +#ifndef embl_ebi_types_idl +#define embl_ebi_types_idl +/** + * The type module contains types (typedefs and structs) that are shared + * between several modules of the EMBL::EBI suite. + * It should be included within the module EMBL::EBI. + */ +module type { + /** + * sequence of strings. + */ + typedef sequence<string> stringList; + + typedef sequence<long> longList; + + typedef sequence<unsigned long> ulongList; + + /** + * If a query in a database returns no results, the NoResult is raised + */ + exception NoResult {}; + + /** + * If no write permission is granted (i.e. a set-method cannot be performed) + * the NoWritePermission is raised + */ + exception NoWritePermission { string reason;}; + + /** + * If an object reference given as an input parameter is invalid, in the + * context of the current interface, an InvalidRelation is raised + */ + exception InvalidRelation { string reason;}; + + /** + * If a number indicating a position in a sequence is + * outside the limits of the sequence, or more elements are associated to + * an object than it can handle, + * an IndexOutOfRange exception is raised + */ + exception IndexOutOfRange { string reason; }; + +/** + * controlled values (i.e. an attribute can only contain a value taken + * from a well defined range of values, or an in parameter to a method has a + * restricted set of acceptable values) are presented as typescodes, used in + * as union. If a method tries to set a controlled value + * attribute/parameter to an invalid value, an invalidValue is raised + */ + exception InvalidArgumentValue { string reason; }; + + + /** + * a sequence (e.g. string) with an expected defined format cannot be parsed + */ + exception ParseError { string reason; }; + + /** + * Date is a struct to describe a date, independent of any report format + * <dl> + * <dt>day + * <dd>day of month as a number between 1-31 (inclusive) + * <dt>month + * <dd> month of the year as a number between 1-12 (inclusive) + * <dt>year + * <dd>year as a 4 digit number + * </dl> + */ + struct Date { + unsigned short day; + unsigned short month; + unsigned short year; + }; + + typedef sequence<Date> DateList; + + + /** + * A person information : this is used to specify the authors + * <p><dl> + * <dt>surname + * <dd>the person surname + * <dt>firstname + * <dd>the person firstname + * <dt>midinitial + * <dd>the person mid initial + * <dl><p> + */ + struct Person { + string surname; + string firstname; + string midinitial; + }; + typedef sequence<Person> PersonList; + + /** + * Revision + * <dl> + * <dt>date + * <dd>datestamp of revision + * <dt>type + * <dd>type of revision. Valid types are defined in meta + * </dl> + */ + struct Revision { + Date date; + string type; + }; + + typedef sequence<Revision> RevisionList; + + /** + * Database cross-reference. The list of valid database identifiers + * used by the collaboration is defined at the + * <A href="http://www.ncbi.nlm.nih.gov/collab/db_xref.html">NCBI + collaborative web server</A> + * <dl> + * <dt>db + * <dd>database identifier + * <dt>primary_id + * <dd>object identifier in the database + * <dt>version + * <dd> if the referenced database supports versioning, version + * refers to the version of the object, when the cross-reference + * was generated. If can be used to verify that a cross-reference + * is up to date. + * <dt>label + * <dd>secondary identifier, possibly used to indicate a sub/super-part of + * the primary object or an alternative name to the primary id. + * The label can contain information for the convenience + * of the user, but no assumptions should be made on the long-term + * stability of it. + * </dl> + */ + struct DbXref { + string db; + string primary_id; + unsigned long version; + string label; + }; + + typedef sequence<DbXref> DbXrefList; + + /** + * Amino Acid. This can be any amino acid, including modified or unusual ones. + * <P> + * <dl> + * <dt> code + * <dd> <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/aa_abbrevs.html"> + * IUPAC-IUB</A> one-letter code if the amino acid has one assigned + * <dd> othwerwise the code will be a blank character + * <dt> name + * <dd> descriptive name of the + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/aa_abbrevs.html"> + * Amino Acid</A> or + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/modified_aa.html"> + * modified or unusual Amino Acid</A> + * <dt> abbreviation + * <dd> abbreviated name of the + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/aa_abbrevs.html"> + * Amino Acid</A> or + * <A href="http://www.ebi.ac.uk/ebi_docs/embl_db/ft/modified_aa.html"> + * modified or unusual Amino Acid</A> + *</dl> + */ + struct AminoAcid { + char code; + string name; + string abbreviation; + }; + typedef sequence<AminoAcid> AminoAcidList; + + /** + * Translation rule specifying the amino acid encoded by a codon. + * Standard rules are defined by the genetic code of an organism. + * If a CDS uses non-standard rules, this can be annotated with + * qualifier codon (value type CodonTranslation_s). + * <p> + * <dt> codon + * <dd> literal sequence of the codon. + * <dt> amino_acid + * <dd> amino acid used. + * <dd> No modified AA are allowed. + */ + struct CodonTranslation { + string codon; + AminoAcid amino_acid; + }; + typedef sequence<CodonTranslation> CodonTranslationList; + + /** + * Location of the anticodon of tRNA and the amino acid for which + * it codes. + * <p> + * <dl> + * <dt> start + * <dd> start position of the anticodon + * <dt> end + * <dd> end position of the anticodon + * <dt> amino_acid + * <dd> amino acid used. + * <dd> No modified AA are allowed. + * </dl> + */ + struct AntiCodon { + long start; + long end; + AminoAcid amino_acid; + }; + typedef sequence<AntiCodon> AntiCodonList; + + + /** + * Translation exception of a single triplet within a sequence. + * <p> + * <dl> + * <dt> primary_acc + * <dd> This attribute is likely to change in the future. It is very rarely + * used, but it is needed for translation exceptions on CDS's + * spanning entries + * <dt> start + * <dd> startposition of exception in the sequence. + * <dt> end + * <dd> endposition of exception in the sequence + * <dt> amino_acid + * <dd> amino acid used in this exception. + * <dd> No modified AA are allowed. + *</dl> + */ + struct TranslationException { + string primary_acc; + long start; + long end; + AminoAcid amino_acid; + }; + typedef sequence<TranslationException> TranslationExceptionList; + + /** + * flag to indicate that the splice site consensus sequence is not + * present at one of the feature's splice junctions + * If no qualifier is present, the default is that both splice sites + * contain the consensus + */ + struct SpliceConsensus { + boolean five_cons; + boolean three_cons; + }; + typedef sequence<SpliceConsensus> SpliceConsensusList; + + + /** + * A RepeatUnit identifies the exact unit that is being repeated. It can be + * either a base range, or can refer to the label of a labeled repeat_unit + * feature (but not both; this datatype is likely to be superceded by a + * union). + * <dt> start + * <dd> position of first base in the first occurrence of the repeated segment + * <dt> end + * <dd> position of the last base in first occurence of the repeated segment + * <dt> label + * <dd> Currently, usually a textual description of the repeating segment, + * but can refer to a labeled repeat_unit feature. + * This attribute may in future become a proper DbXref to a feature; + */ + struct RepeatUnit { + long start; + long end; + string label; + }; + typedef sequence<RepeatUnit> RepeatUnitList; + + /** + * a position can be fuzzy. FuzzyType + * defines how to interprete the combination of value and size, to define + * the range of fuzziness. + * Currently values 1-5 are in use. + * 5-10 are reserved for future use. + */ + typedef long FuzzyTypeCode; + + + /** + * value is an exact position + */ + const long Exact_ftc = 1; + + /** + *A single base chosen from a range or span of bases is indicated + * by the first base number (value) and the last base number of the range + * (value+size) inclusive. + */ + const long In_ftc = 2; + + /** + * A site between two points (nucleotides), such as an endonucleolytic + * cleavage site. The site is a single position between 2 consecutive bases + * in the range. The range is defined by value, value+size (inclusive) + */ + const long Between_ftc = 3; + + + /** + * an end point is undefined but behind (and does not include) the + * base number specified in value. + * size is unused. + */ + const long Less_ftc = 4; + + + /** + * an end point is undefined but before (and does not include) the + * base number specified in value. + * size is unused. + */ + const long Greater_ftc = 5; + + struct Fuzzy { + long value; + long size; + FuzzyTypeCode type; + }; + typedef sequence<Fuzzy> FuzzyList; + +}; + +#endif + +