From 064ffdbc956cbde95424cc7d0853d78773f07b51 Mon Sep 17 00:00:00 2001
From: tjc <tjc@ee4ac58c-ac51-4696-9907-e4b3aa274f04>
Date: Wed, 21 Jun 2006 10:11:36 +0000
Subject: [PATCH] add extra_qualifiers

git-svn-id: svn+ssh://svn.internal.sanger.ac.uk/repos/svn/pathsoft/artemis/trunk@4445 ee4ac58c-ac51-4696-9907-e4b3aa274f04
---
 etc/af063097.embl |  64 ++++----
 etc/b132222.embl  |  22 +--
 etc/c1215.genbank |  18 +--
 etc/options       |   6 +-
 etc/run_blastp    | 372 ++++++++++++++++++++++++++++++++++++++++++----
 etc/run_fasta     |  17 +--
 6 files changed, 406 insertions(+), 93 deletions(-)

diff --git a/etc/af063097.embl b/etc/af063097.embl
index 99cf9f44a..909837042 100644
--- a/etc/af063097.embl
+++ b/etc/af063097.embl
@@ -315,6 +315,7 @@ CC   gi:1806573 gi:215666 gi:506836 gi:508561 gi:1061043 gi:215669
 CC   gi:1769975 gi:494974 gi:215664 gi:15620 gi:393451 gi:1769977
 CC   gi:215681 gi:215675 gi:215676 gi:15141 gi:414530.
 XX
+XX
 FH   Key             Location/Qualifiers
 FH
 FT   source          1..33593
@@ -382,8 +383,8 @@ FT                   /evidence=EXPERIMENTAL
 FT                   /note="activator binding site; pO, pP promoters"
 FT   misc_feature    3127
 FT                   /evidence=EXPERIMENTAL
-FT                   /note="transcription start site for O operon; encodes O, N,
-FT                   M, L, X, Y, K, lysA, lysB, R, S"
+FT                   /note="transcription start site for O operon; encodes O,
+FT                   N, M, L, X, Y, K, lysA, lysB, R, S"
 FT   CDS             3167..4021
 FT                   /codon_start=1
 FT                   /db_xref="SWISS-PROT:P25478"
@@ -430,18 +431,18 @@ FT                   /note="Nam209 mutation"
 FT                   /replace="at"
 FT                   /gene="N"
 FT   variation       4630
-FT                   /note="sir3 mutation; defective for choice of small capsids
-FT                   by satellite phage P4"
+FT                   /note="sir3 mutation; defective for choice of small
+FT                   capsids by satellite phage P4"
 FT                   /replace="c"
 FT                   /gene="N"
 FT   variation       4695
-FT                   /note="sir5 mutation; defective for choice of small capsids
-FT                   by satellite phage P4"
+FT                   /note="sir5 mutation; defective for choice of small
+FT                   capsids by satellite phage P4"
 FT                   /replace="t"
 FT                   /gene="N"
 FT   variation       4695..4697
-FT                   /note="sir9 mutation; defective for choice of small capsids
-FT                   by satellite phage P4"
+FT                   /note="sir9 mutation; defective for choice of small
+FT                   capsids by satellite phage P4"
 FT                   /replace=""
 FT                   /gene="N"
 FT   variation       4699
@@ -450,13 +451,13 @@ FT                   of small capsids by satellite phage P4"
 FT                   /replace="t"
 FT                   /gene="N"
 FT   variation       4740
-FT                   /note="sir1 mutation; defective for choice of small capsids
-FT                   by satellite phage P4"
+FT                   /note="sir1 mutation; defective for choice of small
+FT                   capsids by satellite phage P4"
 FT                   /replace="g"
 FT                   /gene="N"
 FT   variation       4741
-FT                   /note="sir4 mutation; defective for choice of small capsids
-FT                   by satellite phage P4"
+FT                   /note="sir4 mutation; defective for choice of small
+FT                   capsids by satellite phage P4"
 FT                   /replace="a"
 FT                   /gene="N"
 FT   CDS             5157..5900
@@ -670,8 +671,8 @@ FT                   PDGAVIEYEPETSALTVSGIKTASVTASGSVTATVPVVMVKASTRVTLDTPEVVCTNRL
 FT                   ITGTLEVQKGGTMRGNIEHTGGELSSNGKVLHTHKHPGDSGGTTGSPL"
 FT   misc_feature    10308
 FT                   /evidence=EXPERIMENTAL
-FT                   /note="transcription start site for V operon; encodes V, W,
-FT                   J, I, H, G"
+FT                   /note="transcription start site for V operon; encodes V,
+FT                   W, J, I, H, G"
 FT                   /gene="V"
 FT   variation       10329
 FT                   /note="Vam42, Vam203 and Vam205 mutations"
@@ -871,6 +872,16 @@ FT                   /translation="MAMPRKLKLMNVFLNGYSYQGVAKSVTLPKLTRKLENYRGAGMNG
 FT                   SAPVDLGLDDDALSMEWSLGGFPDSVIWELYAATGVDAVPIRFAGSYQRDDTGETVAVE
 FT                   VVMRGRQKEIDTGEGKQGEDTESKISVVCTYFRLTMDGKELVEIDTINMIEKVNGVDRL
 FT                   EQHRRNIGL"
+FT   CDS             19430..19705
+FT                   /codon_start=1
+FT                   /db_xref="SPTREMBL:O64313"
+FT                   /transl_table=11
+FT                   /gene="E"
+FT                   /function="essential tail protein"
+FT                   /product="gpE"
+FT                   /protein_id="AAD03291.1"
+FT                   /translation="MNKENVITLDNPVKRGEQVIEQVTLMKPSAGTLRGVSLAAVANSE
+FT                   VDALIKVLPRMTAPMLTEQEVAALELPDLVALAGKVVGFLSPNSVQ"
 FT   CDS             join(19430..19684,19684..19857)
 FT                   /codon_start=1
 FT                   /db_xref="SPTREMBL:O64312"
@@ -883,16 +894,6 @@ FT                   /protein_id="AAD03292.1"
 FT                   /translation="MNKENVITLDNPVKRGEQVIEQVTLMKPSAGTLRGVSLAAVANSE
 FT                   VDALIKVLPRMTAPMLTEQEVAALELPDLVALAGKVVGFLVAELGAVTFPKNLSVDDLM
 FT                   ADVAVIFHWPPSELYPMSLTELITWREKALRRSGNTNE"
-FT   CDS             19430..19705
-FT                   /codon_start=1
-FT                   /db_xref="SPTREMBL:O64313"
-FT                   /transl_table=11
-FT                   /gene="E"
-FT                   /function="essential tail protein"
-FT                   /product="gpE"
-FT                   /protein_id="AAD03291.1"
-FT                   /translation="MNKENVITLDNPVKRGEQVIEQVTLMKPSAGTLRGVSLAAVANSE
-FT                   VDALIKVLPRMTAPMLTEQEVAALELPDLVALAGKVVGFLSPNSVQ"
 FT   variation       19473
 FT                   /note="Ets55 mutation"
 FT                   /replace="a"
@@ -1018,11 +1019,11 @@ FT                   /note="saf mutation; altered integration site specificity"
 FT                   /replace="g"
 FT   variation       24376
 FT                   /note="left end del3 deletion/insertion"
-FT   variation       24378..25927
-FT                   /note="vir22 deletion/insertion"
 FT   variation       24378..25065
 FT                   /note="del6 deletion"
 FT                   /replace=""
+FT   variation       24378..25927
+FT                   /note="vir22 deletion/insertion"
 FT   variation       24381..26340
 FT                   /note="vir94 deletion/insertion"
 FT   protein_bind    24399..24472
@@ -1141,9 +1142,9 @@ FT                   /codon_start=1
 FT                   /db_xref="SWISS-PROT:P07695"
 FT                   /transl_table=11
 FT                   /gene="cox"
-FT                   /function="repressor of Pc; required for prophage excision;
-FT                   inhibits integration; activates P11 promoter of satellite
-FT                   phage P4"
+FT                   /function="repressor of Pc; required for prophage
+FT                   excision; inhibits integration; activates P11 promoter of
+FT                   satellite phage P4"
 FT                   /product="Cox"
 FT                   /protein_id="AAD03299.1"
 FT                   /translation="MSKQVTLMTDAIPYQEFAKLIGKSTGAVRRMIDKGKLPVIDMTDP
@@ -1174,8 +1175,8 @@ FT                   /codon_start=1
 FT                   /db_xref="SWISS-PROT:P07696"
 FT                   /transl_table=11
 FT                   /gene="B"
-FT                   /function="essential protein; DNA replication; required for
-FT                   lagging strand synthesis"
+FT                   /function="essential protein; DNA replication; required
+FT                   for lagging strand synthesis"
 FT                   /product="gpB"
 FT                   /protein_id="AAD03301.1"
 FT                   /translation="MTVMTLNLVEKQPAAMRRIIGKHLAVPRWQDTCDYYNQMMERERL
@@ -1338,7 +1339,6 @@ FT   misc_feature    complement(33426)
 FT                   /note="transcript start 2; old-tin mRNA"
 FT   misc_feature    33575..33593
 FT                   /note="cosR; right cohesive end"
-XX
 SQ   Sequence 33593 BP; 8398 A; 7983 C; 8870 G; 8342 T; 0 other;
      ggcgaggcgg ggaaagcact gcgcgctgac ggtggtgctg attgtatttt ttcagcgtct        60
      cagcgcgtcg tgacggcact tagtctgccc gttgaggcgt tgtgtgtctg cggggtgttt       120
diff --git a/etc/b132222.embl b/etc/b132222.embl
index 4e4dcccab..8dc1ddd84 100644
--- a/etc/b132222.embl
+++ b/etc/b132222.embl
@@ -137,6 +137,7 @@ XX
 CC   On Jul 23, 1998 this sequence version replaced gi:14895 gi:15549
 CC   gi:1262241 gi:974436.
 XX
+XX
 FH   Key             Location/Qualifiers
 FH
 FT   source          1..30624
@@ -565,13 +566,13 @@ FT                   /evidence=EXPERIMENTAL
 FT                   /note="two CI binding sites for repression of pB"
 FT                   /gene="B"
 FT                   /bound_moiety="CI"
+FT   -35_signal      20549..20554
+FT                   /gene="B"
 FT   promoter        20549..20584
 FT                   /evidence=EXPERIMENTAL
 FT                   /note="pB; repressed by CI"
 FT                   /gene="B"
 FT                   /function="transcription of B gene"
-FT   -35_signal      20549..20554
-FT                   /gene="B"
 FT   -10_signal      20572..20577
 FT                   /gene="B"
 FT   CDS             20609..20827
@@ -611,8 +612,8 @@ FT   tRNA            20985..21030
 FT                   /note="truncated"
 FT                   /product="tRNA-Ile"
 FT   misc_feature    20995..21030
-FT                   /note="in common with attB in Escherichia coli, which is in
-FT                   the 3' end of ileY"
+FT                   /note="in common with attB in Escherichia coli, which is
+FT                   in the 3' end of ileY"
 FT   misc_feature    21023..21029
 FT                   /note="attP; predicted crossover region with attB"
 FT   variation       21030..22865
@@ -732,10 +733,6 @@ FT                   /note="FR"
 FT                   /bound_moiety="CI"
 FT                   /function="probably responsible for CI repression of pE
 FT                   promoter"
-FT   protein_bind    23435..23462
-FT                   /gene="cII"
-FT                   /bound_moiety="CII"
-FT                   /function="activate pE"
 FT   CDS             23435..23944
 FT                   /codon_start=1
 FT                   /db_xref="SWISS-PROT:P21678"
@@ -752,6 +749,10 @@ FT                   /translation="MFDFQVSKHPHYDEACRAFAQRHNMAKLAERAGMNVQTLRNKLNP
 FT                   EQPHQFTPPELWLLTDLTEDSTLVDGFLAQIHCLPCVPVNELAKDKLQSYVMRAMSELG
 FT                   ELASGAVSDERLTTARKHNMIESVNSGIRMLSLSALALHARLQTNPAMSSVVDTMSGIG
 FT                   ASFGLI"
+FT   protein_bind    23435..23462
+FT                   /gene="cII"
+FT                   /bound_moiety="CII"
+FT                   /function="activate pE"
 FT   variation       23440
 FT                   /note="prevents CII binding to pE; cIV476"
 FT                   /replace="a"
@@ -790,8 +791,8 @@ FT                   /note="CP78am; dhram"
 FT                   /replace="a"
 FT                   /gene="dhr"
 FT   terminator      24384..24412
-FT                   /note="tR1; one of a number of transcription terminators in
-FT                   this region that are anti-terminated during early lytic
+FT                   /note="tR1; one of a number of transcription terminators
+FT                   in this region that are anti-terminated during early lytic
 FT                   development by an unknown mechanism"
 FT   CDS             24433..24666
 FT                   /codon_start=1
@@ -996,7 +997,6 @@ FT                   /replace=""
 FT                   /gene="orf97"
 FT   terminator      30436..30463
 FT                   /note="t95b; p95 transcript terminator"
-XX
 SQ   Sequence 30624 BP; 7461 A; 7801 C; 8456 G; 6906 T; 0 other;
      ggcgtggcgg ggaaagcatt gcgcgccaga ggtggcgcgt gaatgataaa aattatcgtc        60
      tgagcgcctc gtaatggcgc tatcgtggtg ctgttggttc gttggtggtc gtgtgtgttt       120
diff --git a/etc/c1215.genbank b/etc/c1215.genbank
index 9e3c3d873..6ba609013 100644
--- a/etc/c1215.genbank
+++ b/etc/c1215.genbank
@@ -63,8 +63,6 @@ FEATURES             Location/Qualifiers
      rRNA            415..581
                      /note="SPRG5SD K00771 Yeast (s.pombe) 5s rrna gene and
                      flanks"
-     gene            join(1501..1760,1844..2456)
-                     /gene="SPBC1215.01"
      CDS             join(1501..1760,1844..2456)
                      /gene="SPBC1215.01"
                      /note="SPBC1215.01, len:290, SIMILARITY:Saccharomyces
@@ -82,17 +80,15 @@ FEATURES             Location/Qualifiers
                      PKGPVVIEGLLRQHTDKPRFMMKNEPEKNSFYFLNVREFAQLKGTLPILITELQPSLT
                      PLQEADHVKRGLPLGHPLKVEIFNSHTEYIITWYSLSVVSAIMLYVYFKRGSGTSSLN
                      SAYERSKILNNKRL"
+     gene            join(1501..1760,1844..2456)
+                     /gene="SPBC1215.01"
      misc_feature    1761..1766
                      /gene="SPBC1215.01"
                      /note="gtacgt, splice donor sequence"
      misc_feature    1827..1843
                      /gene="SPBC1215.01"
                      /note="ctaacataatcacttag, splice branch and acceptor"
-     gene            complement(join(2495..2602,2663..2881,2953..4071,
-                     4111..4372,4413..4849,4889..5007,5251..5389,5434..5466))
-                     /gene="SPBC1215.02c"
-     CDS             complement(join(2495..2602,2663..2881,2953..4071,
-                     4111..4372,4413..4849,4889..5007,5251..5389,5434..5466))
+     CDS             complement(join(2495..2602,2663..2881,2953..4071,4111..4372,4413..4849,4889..5007,5251..5389,5434..5466))
                      /gene="SPBC1215.02c"
                      /note="SPBC1215.02c, len:811, SIMILARITY:Saccharomyces
                      cerevisiae, DEC1_YEAST, dec1 protein, (796 aa), fasta
@@ -118,6 +114,8 @@ FEATURES             Location/Qualifiers
                      LIESFDYENSTPLSQLTKYTEIINDLITCLNSFLYHVSATKKKEFTRQYQLLKNISSN
                      KLGSISGITKHKKKAARKYVSELLSNSWLSNLSETQVPYDPKFAKQVGEGMIDSYIQT
                      TDAVSKLPKFVKF"
+     gene            complement(join(2495..2602,2663..2881,2953..4071,4111..4372,4413..4849,4889..5007,5251..5389,5434..5466))
+                     /gene="SPBC1215.02c"
      misc_feature    complement(2603..2612)
                      /gene="SPBC1215.02c"
                      /note="ctaataatag, splice branch and acceptor"
@@ -163,8 +161,8 @@ FEATURES             Location/Qualifiers
      misc_feature    6387..6490
                      /note="nominal overlap with cosmid SPBC83 S. pombe
                      chromosome 2"
-BASE COUNT     2097 a   1138 c   1138 g   2117 t
-ORIGIN      
+BASE COUNT        0 a      0 c      0 g      0 t
+ORIGIN
         1 tatatataat ttaataaata cattccgacg atactgcctc tatggcttag tggtacagca
        61 tcgcacttgt aatgcgaaga tccttggttc gattccgagt ggaggcatat acattatatt
       121 atattctttt tcatgcggaa aaaagatttc aaatttttgg gtatgatatt aatatgactg
@@ -273,5 +271,5 @@ ORIGIN
      6301 gattattgtt gaaaagcgca gacgaagttt agagaattac tagcgtattt taaatttaat
      6361 caacggacta ttttttattc ctttgagatc cgactttatc gctttgcttc taattttcca
      6421 aaattcagtc tatctacgcg atccagccct gtttgcgtaa atttcatatt atttttcttt
-     6481 aaacgtttgg 
+     6481 aaacgtttgg
 //
diff --git a/etc/options b/etc/options
index 1c447a161..22498c000 100644
--- a/etc/options
+++ b/etc/options
@@ -2,7 +2,7 @@
 
 # (Note that comment lines start with a hash (#) symbol)
 
-# $Header: //tmp/pathsoft/artemis/etc/options,v 1.21 2005-07-11 08:52:08 tjc Exp $
+# $Header: //tmp/pathsoft/artemis/etc/options,v 1.22 2006-06-21 10:11:36 tjc Exp $
 
 # This file should contain option settings that look like this:
 #
@@ -496,10 +496,13 @@ extra_qualifiers = \
     obsolete_product "text" \
     origid "text" \
     ortholog "text" \
+    other_transcript  "text" \
     paralog "text" \
     pepstats_file "text" \
     percent_id text \
     pfam_match "text" \
+    previous_other_transcript "text" \
+    previous_shared_id "text" \
     previous_systematic_id "text" \
     primary_name "text" \
     prosite_match "text" \
@@ -510,6 +513,7 @@ extra_qualifiers = \
     score text \
     sequence_source "text" \
     sequence_status "text" \
+    shared_id "text" \
     sigcleave_file "text" \
     signal "text" \
     similarity "text" \
diff --git a/etc/run_blastp b/etc/run_blastp
index 9b557691b..2ee1b2a1b 100755
--- a/etc/run_blastp
+++ b/etc/run_blastp
@@ -6,7 +6,7 @@
 # to customise this script see the function called run_one_prog below
 
 
-RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp,v 1.4 2005-12-20 11:50:59 tjc Exp $"
+RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp,v 1.5 2006-06-21 10:11:36 tjc Exp $"
 
 PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'`
 
@@ -21,17 +21,17 @@ else
     then
         DATABASE=$2 export DATABASE
     else
-        echo usage: $0 -onefile input_file output_file database
-        echo    or: $0 file_of_filenames database
+        echo usage: $0 -onefile input_file output_file database 1>&2
+        echo    or: $0 file_of_filenames database 1>&2
         exit 1
     fi
 fi
 
 
+
 # expand any ~ or environment variables
 EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f`
 
-
 ### change this function to suit your site:
 
 run_one_prog () {
@@ -41,18 +41,7 @@ run_one_prog () {
 
 
     ### change these lines:
-    EXEC=${EXEC-`which blastall 2>/dev/null`}
-
-    if [ ! -x "$EXEC" ]; then
-      EXEC=`find Artemis* -name blastall 2>/dev/null`
-    fi
-
-    if [ ! -d "$BLASTDB" ]; then
-      DATABASE_TMP="$PWD/"`find Artemis* -name blast-data 2>/dev/null`"/$DATABASE"
-      if [ -f "$DATABASE_TMP" ]; then
-        DATABASE="$DATABASE_TMP"
-      fi
-    fi
+    EXEC=blastall
 
     echo "about to start $EXEC with input from $INPUT_FILE and output to"
     echo "$OUTPUT_FILE using database $DATABASE"
@@ -60,10 +49,30 @@ run_one_prog () {
 
     EXTRA_ARGS=
 
+    HOSTNAME=`hostname`
+    REMOTE=N
+
+    case $HOSTNAME in
+        deskpro*)
+          REMOTE=Y ;;
+        *)
+    esac
+
+    if [ $REMOTE = "Y" ]; then
+      WDIR=`pwd`
+      export WDIR
+#      rsh babel "cd $WDIR; lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \
+#                           blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \
+#                           $EXTRA_ARGS >! $OUTPUT_FILE"
+      ssh babel "cd $WDIR; bsub -q  longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS"
+    else
     # add/change the flags to suit your site:
-    nice -19 $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \
-      $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | 
-      tee ${PROG}_errors.new 1>&2
+#      lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \
+#        blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \
+#        $EXTRA_ARGS 2>&1 > $OUTPUT_FILE |
+#        tee ${PROG}_errors.new 1>&2
+       bsub -q  longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS
+    fi
 
     #### end of changes
 
@@ -73,24 +82,337 @@ run_one_prog () {
 
     if [ -s ${PROG}_errors.new ]
     then
-        ( echo ERROR running $PROG: ; echo; 
+        ( echo ERROR running $PROG: ; echo;
           echo ===================================================
           cat ${PROG}_errors.new ) >> $OUTPUT_FILE
         cat ${PROG}_errors.new >> ${PROG}_errors
     fi
 }
 
+PERL_PROG_1='
+
+local *BSUB;
+
+my $file     = $ARGV[0];
+my $database = $ARGV[1];
+my $pwd      = $ARGV[2];
+chomp $file;
+chomp $database;
+
+open(BSUB, "| bsub -q normal -o fasta_errors -n 1 -R \"select[blast && mem > 500] rusage[r1m=1:mem=500]\" -K") or die "could not open bsub pipe : $!";
+open(LIST_FILE,$file);
+
+$EXEC="blastall";
+
+while(my $inFile = <LIST_FILE>)
+{
+  chomp($inFile);
+
+  if($inFile =~ m/^($pwd)(.*)/)
+  {
+    my $inFile_tmp = $2;
+    while($inFile_tmp =~ m/^(\/)(.*)/)
+    {
+      $inFile_tmp = $2;
+    }
+
+    if( -e $inFile_tmp )
+    {
+      $inFile = $inFile_tmp;
+    }
+  }
+ 
+  if($inFile =~ m/^(\S{100})/)
+  {
+    if($inFile =~ m/^(\S{90,})(blastp\/\S+)/)
+    {
+      my $inFile_tmp = $1;
+
+      if( -e $inFile_tmp )
+      {
+        print BSUB "cd $inFile_tmp\n";
+        $inFile = $2;
+      }
+    }
+  }
+
+  print BSUB "flexi_blast.pl -p $database $inFile $database ktup 2 > $inFile\.out\n";
+  print BSUB "gzip -9 $inFile\.out\n";
+}
+close BSUB or die "--Could not submit job : $!";
+close LIST_FILE;
+'
+
+
+
+PERL_PROG='
+
+local *BSUB;
+
+my $file     = $ARGV[0];
+my $database = $ARGV[1];
+my $pwd      = $ARGV[2];
+chomp $file;
+chomp $database;
+
+open(LIST_FILE,$file);
+
+$NEW_WDIR=".";
+$NUM_JOBS=0;
+
+while(my $inFile = <LIST_FILE>)
+{ 
+  $NUM_JOBS++;
+  chomp($inFile);
+
+  if($inFile =~ m/^($pwd)(.*)/)
+  {
+    my $inFile_tmp = $2;
+    while($inFile_tmp =~ m/^(\/)(.*)/)
+    {
+      $inFile_tmp = $2;
+    }
+
+    if( -e $inFile_tmp )
+    {
+      $inFile = $inFile_tmp;
+    }
+  }
+ 
+
+  if($inFile =~ m/^(\S{100})/)
+  {
+    if($inFile =~ m/^(\S{90,})(blastp\/\S+)/)
+    {
+      my $inFile_tmp = $1;
+
+      if( -e $inFile_tmp )
+      {
+        $NEW_WDIR=$inFile_tmp;
+        $inFile = $2;
+      }
+    }
+  }
+  # find number of leading zero
+
+  if($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{4})(\d{1})$/)
+  {
+    push(@jobs10, $inFile);
+  }
+  elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{3})(\d{2})$/)
+  {
+    push(@jobs100, $inFile);
+  }
+  elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{2})(\d{3})$/)
+  {
+    push(@jobs1000, $inFile);
+  }
+  elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{1})(\d{4})$/)
+  {
+    push(@jobs10000, $inFile);
+  }
+  else
+  {
+    push(@jobs100000, $inFile);
+  }
+}
+close LIST_FILE;
+
+if(defined @jobs10)
+{
+  my( $num ) = sprintf( "%04d", 0);
+  submit($num, @jobs10);
+}
+
+if(defined @jobs100)
+{
+  my( $num ) = sprintf( "%03d", 0);
+  submit($num, @jobs100);
+}
+
+if(defined @jobs1000)
+{
+  my( $num ) = sprintf( "%02d", 0);
+  submit($num, @jobs1000);
+}
+
+if(defined @jobs10000)
+{
+  my( $num ) = sprintf( "%01d", 0);
+  submit($num, @jobs10000);
+}
+
+if(defined @jobs100000)
+{
+  submit("", @jobs100000);
+}
+
+for($count = 0; $count < @bsub_jobs; $count++)
+{
+  open(BSUB, "| bsub -q normal -R \"select[mem > 1] rusage[mem=1]\" -o /dev/null -e /dev/null -w \"ended($bsub_jobs[$count])\" -K -J \"$bsub_jobs[$count]\_fin\"")  or die "could not open bsub pipe  : $!";
+  print BSUB "\"echo finished > /dev/null\" > /dev/null 2> /dev/null";
+  close BSUB; # or die "--Could not submit job : $!";
+} 
+ 
+
+sub submit
+{
+  my ($num, @jobs) = @_;
+   
+  my $prefix;
+  my @starts;
+  my @prefixes;
+
+  if($jobs[0] =~ m/^(.*)(\/blastp\/)([^\/]*)(\d{5})/)
+  {
+    $prefix = $1.$2.$3;
+    push(@starts, "$4");
+  }
+  elsif($jobs[0] =~ m/^(blastp\/)([^\/]*)(\d{5})/)
+  {
+    $prefix = $1.$2;
+    push(@starts, "$3");
+  }
+
+  # escape characters
+  if($prefix =~ /(\+|\?|\*|\[|\])/)
+  {
+    $prefix =~ s/\+/\\+/;
+    $prefix =~ s/\?/\\?/;
+    $prefix =~ s/\*/\\*/;
+    $prefix =~ s/\[/\\[/;
+    $prefix =~ s/\]/\\]/;
+  }
+  push(@prefixes, $prefix);
+
+  
+  #different entries have different prefixes
+  for($count =0; $count < @jobs; $count++)
+  {
+    if($jobs[$count] !~ m/^$prefix(.*)/)
+    {
+      if($jobs[$count] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/)
+      {
+        $prefix = $1.$2.$3;
+
+        push(@starts, "$4");
+
+        if($prefix =~ /(\+|\?|\*|\[|\])/)
+        {
+          $prefix =~ s/\+/\\+/;
+          $prefix =~ s/\?/\\?/;
+          $prefix =~ s/\*/\\*/;
+          $prefix =~ s/\[/\\[/;
+          $prefix =~ s/\]/\\]/;
+        }
+
+        push(@prefixes, $prefix);
+      }
+    }
+  }
+
+  for($count =0; $count < @prefixes; $count++)
+  {
+    $prefix = $prefixes[$count];
+    $start  = $starts[$count];
+
+    # build the index description that need to be run
+    $index="$start-";
+    $end="$start";
+    for($j =0; $j < @jobs; $j++)
+    {
+      if($jobs[$j] =~ m/^$prefix(.*)/)
+      {
+        if($jobs[$j] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/)
+        {
+          if($j == @jobs-1)
+          {
+            $index = "$index$4";
+          }
+          elsif($end+1 >= $4)
+          {
+            $end = "$4";
+          }
+          else
+          {
+            $index = "$index$end,$4-"
+          }
+        }
+      }
+    }
+
+    if($index =~ m/(\-)$/)
+    {
+      $index = "$index$end";
+    }
+
+    print "$prefix, $index, $num";
+    bsub($prefix, $index, $num);
+  }
+}
+
+
+# start job arrays
+sub bsub
+{
+  my ($prefix, $index, $num) = @_;
+
+  my $name = $prefix;
+ 
+  if($prefix =~ m/(\/blastp\/)(.*)/)
+  {
+    $name = "$2";
+  }
+
+  my $random = int( rand( 999+1 ) );
+  
+  push(@bsub_jobs, "$name$random\_blastp");
+
+  my $QUEUE="longblastq";
+  if($NUM_JOBS <= 6)
+  {
+    $QUEUE="normal";
+  }
+
+  print "flexi_blast.pl -p $database $prefix$num\n";
+
+  open(BSUB, "| bsub -q $QUEUE -o /dev/null -n 1 -R \"select[blast && mem > 500] rusage[mem=500]\" -J$name$random\_blastp\"[$index]%16\"")  or die "could not open bsub pipe  : $!";
+  print BSUB "cd $NEW_WDIR\n";
+  print BSUB "flexi_blast.pl -p $database $prefix$num";
+  print BSUB "\${LSB_JOBINDEX} > $prefix$num";
+  print BSUB "\${LSB_JOBINDEX}\.out\n";
+
+  print BSUB "gzip -9f $prefix$num";
+  print BSUB "\${LSB_JOBINDEX}\.out\n";
+
+ close BSUB or die "--Could not submit job : $!";
+}
+
+'
+
 (echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill
 
 chmod a+x $PROG.kill
 
+HOSTNAME=`hostname`
+REMOTE=N
+
+case $HOSTNAME in
+    deskpro*)
+      REMOTE=Y ;;
+    *)
+esac
+
 if [ x$ONEFILE = x ]
 then
-    for i in `cat $1`
-    do
-        run_one_prog $i $i.out $EXPANDED_DATABASE
-    done
-
+   if [ $REMOTE = "Y" ]; then
+      WDIR=`pwd`
+      export WDIR
+      CMD=`echo $PERL_PROG_1`
+      ssh babel "cd $WDIR; perl -w -e '$CMD' \"$1\" \"$EXPANDED_DATABASE\" \"$PWD\""
+   else
+      perl -w -e "$PERL_PROG" "$1" "$EXPANDED_DATABASE" "$PWD"
+   fi
 else
     run_one_prog $1 $2 $EXPANDED_DATABASE
 fi
diff --git a/etc/run_fasta b/etc/run_fasta
index 13f597b31..286300de9 100755
--- a/etc/run_fasta
+++ b/etc/run_fasta
@@ -6,7 +6,7 @@
 # to customise this script see the function called run_one_prog below
 
 
-RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_fasta,v 1.14 2005-12-20 13:44:18 tjc Exp $"
+RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_fasta,v 1.15 2006-06-21 10:11:36 tjc Exp $"
 
 PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'`
 
@@ -41,20 +41,9 @@ run_one_prog () {
 
 
     ### change these lines:
-    EXEC=${EXEC-`which fasta34 2>/dev/null`}
 
-    if [ ! -x "$EXEC" ]; then
-      EXEC=`find Artemis* -name fasta34 2>/dev/null`
-    fi
- 
-    if [ ! -f "$FASTLIBS" ]; then
-      DATABASE_TMP=`echo $DATABASE | sed 's|\%||'`
-      DATABASE_TMP="$PWD/"`find Artemis* -name blast-data 2>/dev/null`"/$DATABASE_TMP"
-      if [ -f "$DATABASE_TMP" ]; then
-        DATABASE="$DATABASE_TMP"
-      fi
-    fi
-   
+    EXEC=fasta34
+
     echo "about to start $EXEC with input from $INPUT_FILE and output to" 1>&2
     echo "$OUTPUT_FILE using database $DATABASE" 1>&2
 
-- 
GitLab