From 064ffdbc956cbde95424cc7d0853d78773f07b51 Mon Sep 17 00:00:00 2001 From: tjc <tjc@ee4ac58c-ac51-4696-9907-e4b3aa274f04> Date: Wed, 21 Jun 2006 10:11:36 +0000 Subject: [PATCH] add extra_qualifiers git-svn-id: svn+ssh://svn.internal.sanger.ac.uk/repos/svn/pathsoft/artemis/trunk@4445 ee4ac58c-ac51-4696-9907-e4b3aa274f04 --- etc/af063097.embl | 64 ++++---- etc/b132222.embl | 22 +-- etc/c1215.genbank | 18 +-- etc/options | 6 +- etc/run_blastp | 372 ++++++++++++++++++++++++++++++++++++++++++---- etc/run_fasta | 17 +-- 6 files changed, 406 insertions(+), 93 deletions(-) diff --git a/etc/af063097.embl b/etc/af063097.embl index 99cf9f44a..909837042 100644 --- a/etc/af063097.embl +++ b/etc/af063097.embl @@ -315,6 +315,7 @@ CC gi:1806573 gi:215666 gi:506836 gi:508561 gi:1061043 gi:215669 CC gi:1769975 gi:494974 gi:215664 gi:15620 gi:393451 gi:1769977 CC gi:215681 gi:215675 gi:215676 gi:15141 gi:414530. XX +XX FH Key Location/Qualifiers FH FT source 1..33593 @@ -382,8 +383,8 @@ FT /evidence=EXPERIMENTAL FT /note="activator binding site; pO, pP promoters" FT misc_feature 3127 FT /evidence=EXPERIMENTAL -FT /note="transcription start site for O operon; encodes O, N, -FT M, L, X, Y, K, lysA, lysB, R, S" +FT /note="transcription start site for O operon; encodes O, +FT N, M, L, X, Y, K, lysA, lysB, R, S" FT CDS 3167..4021 FT /codon_start=1 FT /db_xref="SWISS-PROT:P25478" @@ -430,18 +431,18 @@ FT /note="Nam209 mutation" FT /replace="at" FT /gene="N" FT variation 4630 -FT /note="sir3 mutation; defective for choice of small capsids -FT by satellite phage P4" +FT /note="sir3 mutation; defective for choice of small +FT capsids by satellite phage P4" FT /replace="c" FT /gene="N" FT variation 4695 -FT /note="sir5 mutation; defective for choice of small capsids -FT by satellite phage P4" +FT /note="sir5 mutation; defective for choice of small +FT capsids by satellite phage P4" FT /replace="t" FT /gene="N" FT variation 4695..4697 -FT /note="sir9 mutation; defective for choice of small capsids -FT by satellite phage P4" +FT /note="sir9 mutation; defective for choice of small +FT capsids by satellite phage P4" FT /replace="" FT /gene="N" FT variation 4699 @@ -450,13 +451,13 @@ FT of small capsids by satellite phage P4" FT /replace="t" FT /gene="N" FT variation 4740 -FT /note="sir1 mutation; defective for choice of small capsids -FT by satellite phage P4" +FT /note="sir1 mutation; defective for choice of small +FT capsids by satellite phage P4" FT /replace="g" FT /gene="N" FT variation 4741 -FT /note="sir4 mutation; defective for choice of small capsids -FT by satellite phage P4" +FT /note="sir4 mutation; defective for choice of small +FT capsids by satellite phage P4" FT /replace="a" FT /gene="N" FT CDS 5157..5900 @@ -670,8 +671,8 @@ FT PDGAVIEYEPETSALTVSGIKTASVTASGSVTATVPVVMVKASTRVTLDTPEVVCTNRL FT ITGTLEVQKGGTMRGNIEHTGGELSSNGKVLHTHKHPGDSGGTTGSPL" FT misc_feature 10308 FT /evidence=EXPERIMENTAL -FT /note="transcription start site for V operon; encodes V, W, -FT J, I, H, G" +FT /note="transcription start site for V operon; encodes V, +FT W, J, I, H, G" FT /gene="V" FT variation 10329 FT /note="Vam42, Vam203 and Vam205 mutations" @@ -871,6 +872,16 @@ FT /translation="MAMPRKLKLMNVFLNGYSYQGVAKSVTLPKLTRKLENYRGAGMNG FT SAPVDLGLDDDALSMEWSLGGFPDSVIWELYAATGVDAVPIRFAGSYQRDDTGETVAVE FT VVMRGRQKEIDTGEGKQGEDTESKISVVCTYFRLTMDGKELVEIDTINMIEKVNGVDRL FT EQHRRNIGL" +FT CDS 19430..19705 +FT /codon_start=1 +FT /db_xref="SPTREMBL:O64313" +FT /transl_table=11 +FT /gene="E" +FT /function="essential tail protein" +FT /product="gpE" +FT /protein_id="AAD03291.1" +FT /translation="MNKENVITLDNPVKRGEQVIEQVTLMKPSAGTLRGVSLAAVANSE +FT VDALIKVLPRMTAPMLTEQEVAALELPDLVALAGKVVGFLSPNSVQ" FT CDS join(19430..19684,19684..19857) FT /codon_start=1 FT /db_xref="SPTREMBL:O64312" @@ -883,16 +894,6 @@ FT /protein_id="AAD03292.1" FT /translation="MNKENVITLDNPVKRGEQVIEQVTLMKPSAGTLRGVSLAAVANSE FT VDALIKVLPRMTAPMLTEQEVAALELPDLVALAGKVVGFLVAELGAVTFPKNLSVDDLM FT ADVAVIFHWPPSELYPMSLTELITWREKALRRSGNTNE" -FT CDS 19430..19705 -FT /codon_start=1 -FT /db_xref="SPTREMBL:O64313" -FT /transl_table=11 -FT /gene="E" -FT /function="essential tail protein" -FT /product="gpE" -FT /protein_id="AAD03291.1" -FT /translation="MNKENVITLDNPVKRGEQVIEQVTLMKPSAGTLRGVSLAAVANSE -FT VDALIKVLPRMTAPMLTEQEVAALELPDLVALAGKVVGFLSPNSVQ" FT variation 19473 FT /note="Ets55 mutation" FT /replace="a" @@ -1018,11 +1019,11 @@ FT /note="saf mutation; altered integration site specificity" FT /replace="g" FT variation 24376 FT /note="left end del3 deletion/insertion" -FT variation 24378..25927 -FT /note="vir22 deletion/insertion" FT variation 24378..25065 FT /note="del6 deletion" FT /replace="" +FT variation 24378..25927 +FT /note="vir22 deletion/insertion" FT variation 24381..26340 FT /note="vir94 deletion/insertion" FT protein_bind 24399..24472 @@ -1141,9 +1142,9 @@ FT /codon_start=1 FT /db_xref="SWISS-PROT:P07695" FT /transl_table=11 FT /gene="cox" -FT /function="repressor of Pc; required for prophage excision; -FT inhibits integration; activates P11 promoter of satellite -FT phage P4" +FT /function="repressor of Pc; required for prophage +FT excision; inhibits integration; activates P11 promoter of +FT satellite phage P4" FT /product="Cox" FT /protein_id="AAD03299.1" FT /translation="MSKQVTLMTDAIPYQEFAKLIGKSTGAVRRMIDKGKLPVIDMTDP @@ -1174,8 +1175,8 @@ FT /codon_start=1 FT /db_xref="SWISS-PROT:P07696" FT /transl_table=11 FT /gene="B" -FT /function="essential protein; DNA replication; required for -FT lagging strand synthesis" +FT /function="essential protein; DNA replication; required +FT for lagging strand synthesis" FT /product="gpB" FT /protein_id="AAD03301.1" FT /translation="MTVMTLNLVEKQPAAMRRIIGKHLAVPRWQDTCDYYNQMMERERL @@ -1338,7 +1339,6 @@ FT misc_feature complement(33426) FT /note="transcript start 2; old-tin mRNA" FT misc_feature 33575..33593 FT /note="cosR; right cohesive end" -XX SQ Sequence 33593 BP; 8398 A; 7983 C; 8870 G; 8342 T; 0 other; ggcgaggcgg ggaaagcact gcgcgctgac ggtggtgctg attgtatttt ttcagcgtct 60 cagcgcgtcg tgacggcact tagtctgccc gttgaggcgt tgtgtgtctg cggggtgttt 120 diff --git a/etc/b132222.embl b/etc/b132222.embl index 4e4dcccab..8dc1ddd84 100644 --- a/etc/b132222.embl +++ b/etc/b132222.embl @@ -137,6 +137,7 @@ XX CC On Jul 23, 1998 this sequence version replaced gi:14895 gi:15549 CC gi:1262241 gi:974436. XX +XX FH Key Location/Qualifiers FH FT source 1..30624 @@ -565,13 +566,13 @@ FT /evidence=EXPERIMENTAL FT /note="two CI binding sites for repression of pB" FT /gene="B" FT /bound_moiety="CI" +FT -35_signal 20549..20554 +FT /gene="B" FT promoter 20549..20584 FT /evidence=EXPERIMENTAL FT /note="pB; repressed by CI" FT /gene="B" FT /function="transcription of B gene" -FT -35_signal 20549..20554 -FT /gene="B" FT -10_signal 20572..20577 FT /gene="B" FT CDS 20609..20827 @@ -611,8 +612,8 @@ FT tRNA 20985..21030 FT /note="truncated" FT /product="tRNA-Ile" FT misc_feature 20995..21030 -FT /note="in common with attB in Escherichia coli, which is in -FT the 3' end of ileY" +FT /note="in common with attB in Escherichia coli, which is +FT in the 3' end of ileY" FT misc_feature 21023..21029 FT /note="attP; predicted crossover region with attB" FT variation 21030..22865 @@ -732,10 +733,6 @@ FT /note="FR" FT /bound_moiety="CI" FT /function="probably responsible for CI repression of pE FT promoter" -FT protein_bind 23435..23462 -FT /gene="cII" -FT /bound_moiety="CII" -FT /function="activate pE" FT CDS 23435..23944 FT /codon_start=1 FT /db_xref="SWISS-PROT:P21678" @@ -752,6 +749,10 @@ FT /translation="MFDFQVSKHPHYDEACRAFAQRHNMAKLAERAGMNVQTLRNKLNP FT EQPHQFTPPELWLLTDLTEDSTLVDGFLAQIHCLPCVPVNELAKDKLQSYVMRAMSELG FT ELASGAVSDERLTTARKHNMIESVNSGIRMLSLSALALHARLQTNPAMSSVVDTMSGIG FT ASFGLI" +FT protein_bind 23435..23462 +FT /gene="cII" +FT /bound_moiety="CII" +FT /function="activate pE" FT variation 23440 FT /note="prevents CII binding to pE; cIV476" FT /replace="a" @@ -790,8 +791,8 @@ FT /note="CP78am; dhram" FT /replace="a" FT /gene="dhr" FT terminator 24384..24412 -FT /note="tR1; one of a number of transcription terminators in -FT this region that are anti-terminated during early lytic +FT /note="tR1; one of a number of transcription terminators +FT in this region that are anti-terminated during early lytic FT development by an unknown mechanism" FT CDS 24433..24666 FT /codon_start=1 @@ -996,7 +997,6 @@ FT /replace="" FT /gene="orf97" FT terminator 30436..30463 FT /note="t95b; p95 transcript terminator" -XX SQ Sequence 30624 BP; 7461 A; 7801 C; 8456 G; 6906 T; 0 other; ggcgtggcgg ggaaagcatt gcgcgccaga ggtggcgcgt gaatgataaa aattatcgtc 60 tgagcgcctc gtaatggcgc tatcgtggtg ctgttggttc gttggtggtc gtgtgtgttt 120 diff --git a/etc/c1215.genbank b/etc/c1215.genbank index 9e3c3d873..6ba609013 100644 --- a/etc/c1215.genbank +++ b/etc/c1215.genbank @@ -63,8 +63,6 @@ FEATURES Location/Qualifiers rRNA 415..581 /note="SPRG5SD K00771 Yeast (s.pombe) 5s rrna gene and flanks" - gene join(1501..1760,1844..2456) - /gene="SPBC1215.01" CDS join(1501..1760,1844..2456) /gene="SPBC1215.01" /note="SPBC1215.01, len:290, SIMILARITY:Saccharomyces @@ -82,17 +80,15 @@ FEATURES Location/Qualifiers PKGPVVIEGLLRQHTDKPRFMMKNEPEKNSFYFLNVREFAQLKGTLPILITELQPSLT PLQEADHVKRGLPLGHPLKVEIFNSHTEYIITWYSLSVVSAIMLYVYFKRGSGTSSLN SAYERSKILNNKRL" + gene join(1501..1760,1844..2456) + /gene="SPBC1215.01" misc_feature 1761..1766 /gene="SPBC1215.01" /note="gtacgt, splice donor sequence" misc_feature 1827..1843 /gene="SPBC1215.01" /note="ctaacataatcacttag, splice branch and acceptor" - gene complement(join(2495..2602,2663..2881,2953..4071, - 4111..4372,4413..4849,4889..5007,5251..5389,5434..5466)) - /gene="SPBC1215.02c" - CDS complement(join(2495..2602,2663..2881,2953..4071, - 4111..4372,4413..4849,4889..5007,5251..5389,5434..5466)) + CDS complement(join(2495..2602,2663..2881,2953..4071,4111..4372,4413..4849,4889..5007,5251..5389,5434..5466)) /gene="SPBC1215.02c" /note="SPBC1215.02c, len:811, SIMILARITY:Saccharomyces cerevisiae, DEC1_YEAST, dec1 protein, (796 aa), fasta @@ -118,6 +114,8 @@ FEATURES Location/Qualifiers LIESFDYENSTPLSQLTKYTEIINDLITCLNSFLYHVSATKKKEFTRQYQLLKNISSN KLGSISGITKHKKKAARKYVSELLSNSWLSNLSETQVPYDPKFAKQVGEGMIDSYIQT TDAVSKLPKFVKF" + gene complement(join(2495..2602,2663..2881,2953..4071,4111..4372,4413..4849,4889..5007,5251..5389,5434..5466)) + /gene="SPBC1215.02c" misc_feature complement(2603..2612) /gene="SPBC1215.02c" /note="ctaataatag, splice branch and acceptor" @@ -163,8 +161,8 @@ FEATURES Location/Qualifiers misc_feature 6387..6490 /note="nominal overlap with cosmid SPBC83 S. pombe chromosome 2" -BASE COUNT 2097 a 1138 c 1138 g 2117 t -ORIGIN +BASE COUNT 0 a 0 c 0 g 0 t +ORIGIN 1 tatatataat ttaataaata cattccgacg atactgcctc tatggcttag tggtacagca 61 tcgcacttgt aatgcgaaga tccttggttc gattccgagt ggaggcatat acattatatt 121 atattctttt tcatgcggaa aaaagatttc aaatttttgg gtatgatatt aatatgactg @@ -273,5 +271,5 @@ ORIGIN 6301 gattattgtt gaaaagcgca gacgaagttt agagaattac tagcgtattt taaatttaat 6361 caacggacta ttttttattc ctttgagatc cgactttatc gctttgcttc taattttcca 6421 aaattcagtc tatctacgcg atccagccct gtttgcgtaa atttcatatt atttttcttt - 6481 aaacgtttgg + 6481 aaacgtttgg // diff --git a/etc/options b/etc/options index 1c447a161..22498c000 100644 --- a/etc/options +++ b/etc/options @@ -2,7 +2,7 @@ # (Note that comment lines start with a hash (#) symbol) -# $Header: //tmp/pathsoft/artemis/etc/options,v 1.21 2005-07-11 08:52:08 tjc Exp $ +# $Header: //tmp/pathsoft/artemis/etc/options,v 1.22 2006-06-21 10:11:36 tjc Exp $ # This file should contain option settings that look like this: # @@ -496,10 +496,13 @@ extra_qualifiers = \ obsolete_product "text" \ origid "text" \ ortholog "text" \ + other_transcript "text" \ paralog "text" \ pepstats_file "text" \ percent_id text \ pfam_match "text" \ + previous_other_transcript "text" \ + previous_shared_id "text" \ previous_systematic_id "text" \ primary_name "text" \ prosite_match "text" \ @@ -510,6 +513,7 @@ extra_qualifiers = \ score text \ sequence_source "text" \ sequence_status "text" \ + shared_id "text" \ sigcleave_file "text" \ signal "text" \ similarity "text" \ diff --git a/etc/run_blastp b/etc/run_blastp index 9b557691b..2ee1b2a1b 100755 --- a/etc/run_blastp +++ b/etc/run_blastp @@ -6,7 +6,7 @@ # to customise this script see the function called run_one_prog below -RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp,v 1.4 2005-12-20 11:50:59 tjc Exp $" +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp,v 1.5 2006-06-21 10:11:36 tjc Exp $" PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` @@ -21,17 +21,17 @@ else then DATABASE=$2 export DATABASE else - echo usage: $0 -onefile input_file output_file database - echo or: $0 file_of_filenames database + echo usage: $0 -onefile input_file output_file database 1>&2 + echo or: $0 file_of_filenames database 1>&2 exit 1 fi fi + # expand any ~ or environment variables EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` - ### change this function to suit your site: run_one_prog () { @@ -41,18 +41,7 @@ run_one_prog () { ### change these lines: - EXEC=${EXEC-`which blastall 2>/dev/null`} - - if [ ! -x "$EXEC" ]; then - EXEC=`find Artemis* -name blastall 2>/dev/null` - fi - - if [ ! -d "$BLASTDB" ]; then - DATABASE_TMP="$PWD/"`find Artemis* -name blast-data 2>/dev/null`"/$DATABASE" - if [ -f "$DATABASE_TMP" ]; then - DATABASE="$DATABASE_TMP" - fi - fi + EXEC=blastall echo "about to start $EXEC with input from $INPUT_FILE and output to" echo "$OUTPUT_FILE using database $DATABASE" @@ -60,10 +49,30 @@ run_one_prog () { EXTRA_ARGS= + HOSTNAME=`hostname` + REMOTE=N + + case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) + esac + + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR +# rsh babel "cd $WDIR; lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ +# blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ +# $EXTRA_ARGS >! $OUTPUT_FILE" + ssh babel "cd $WDIR; bsub -q longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS" + else # add/change the flags to suit your site: - nice -19 $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ - $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | - tee ${PROG}_errors.new 1>&2 +# lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ +# blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ +# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | +# tee ${PROG}_errors.new 1>&2 + bsub -q longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS + fi #### end of changes @@ -73,24 +82,337 @@ run_one_prog () { if [ -s ${PROG}_errors.new ] then - ( echo ERROR running $PROG: ; echo; + ( echo ERROR running $PROG: ; echo; echo =================================================== cat ${PROG}_errors.new ) >> $OUTPUT_FILE cat ${PROG}_errors.new >> ${PROG}_errors fi } +PERL_PROG_1=' + +local *BSUB; + +my $file = $ARGV[0]; +my $database = $ARGV[1]; +my $pwd = $ARGV[2]; +chomp $file; +chomp $database; + +open(BSUB, "| bsub -q normal -o fasta_errors -n 1 -R \"select[blast && mem > 500] rusage[r1m=1:mem=500]\" -K") or die "could not open bsub pipe : $!"; +open(LIST_FILE,$file); + +$EXEC="blastall"; + +while(my $inFile = <LIST_FILE>) +{ + chomp($inFile); + + if($inFile =~ m/^($pwd)(.*)/) + { + my $inFile_tmp = $2; + while($inFile_tmp =~ m/^(\/)(.*)/) + { + $inFile_tmp = $2; + } + + if( -e $inFile_tmp ) + { + $inFile = $inFile_tmp; + } + } + + if($inFile =~ m/^(\S{100})/) + { + if($inFile =~ m/^(\S{90,})(blastp\/\S+)/) + { + my $inFile_tmp = $1; + + if( -e $inFile_tmp ) + { + print BSUB "cd $inFile_tmp\n"; + $inFile = $2; + } + } + } + + print BSUB "flexi_blast.pl -p $database $inFile $database ktup 2 > $inFile\.out\n"; + print BSUB "gzip -9 $inFile\.out\n"; +} +close BSUB or die "--Could not submit job : $!"; +close LIST_FILE; +' + + + +PERL_PROG=' + +local *BSUB; + +my $file = $ARGV[0]; +my $database = $ARGV[1]; +my $pwd = $ARGV[2]; +chomp $file; +chomp $database; + +open(LIST_FILE,$file); + +$NEW_WDIR="."; +$NUM_JOBS=0; + +while(my $inFile = <LIST_FILE>) +{ + $NUM_JOBS++; + chomp($inFile); + + if($inFile =~ m/^($pwd)(.*)/) + { + my $inFile_tmp = $2; + while($inFile_tmp =~ m/^(\/)(.*)/) + { + $inFile_tmp = $2; + } + + if( -e $inFile_tmp ) + { + $inFile = $inFile_tmp; + } + } + + + if($inFile =~ m/^(\S{100})/) + { + if($inFile =~ m/^(\S{90,})(blastp\/\S+)/) + { + my $inFile_tmp = $1; + + if( -e $inFile_tmp ) + { + $NEW_WDIR=$inFile_tmp; + $inFile = $2; + } + } + } + # find number of leading zero + + if($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{4})(\d{1})$/) + { + push(@jobs10, $inFile); + } + elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{3})(\d{2})$/) + { + push(@jobs100, $inFile); + } + elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{2})(\d{3})$/) + { + push(@jobs1000, $inFile); + } + elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{1})(\d{4})$/) + { + push(@jobs10000, $inFile); + } + else + { + push(@jobs100000, $inFile); + } +} +close LIST_FILE; + +if(defined @jobs10) +{ + my( $num ) = sprintf( "%04d", 0); + submit($num, @jobs10); +} + +if(defined @jobs100) +{ + my( $num ) = sprintf( "%03d", 0); + submit($num, @jobs100); +} + +if(defined @jobs1000) +{ + my( $num ) = sprintf( "%02d", 0); + submit($num, @jobs1000); +} + +if(defined @jobs10000) +{ + my( $num ) = sprintf( "%01d", 0); + submit($num, @jobs10000); +} + +if(defined @jobs100000) +{ + submit("", @jobs100000); +} + +for($count = 0; $count < @bsub_jobs; $count++) +{ + open(BSUB, "| bsub -q normal -R \"select[mem > 1] rusage[mem=1]\" -o /dev/null -e /dev/null -w \"ended($bsub_jobs[$count])\" -K -J \"$bsub_jobs[$count]\_fin\"") or die "could not open bsub pipe : $!"; + print BSUB "\"echo finished > /dev/null\" > /dev/null 2> /dev/null"; + close BSUB; # or die "--Could not submit job : $!"; +} + + +sub submit +{ + my ($num, @jobs) = @_; + + my $prefix; + my @starts; + my @prefixes; + + if($jobs[0] =~ m/^(.*)(\/blastp\/)([^\/]*)(\d{5})/) + { + $prefix = $1.$2.$3; + push(@starts, "$4"); + } + elsif($jobs[0] =~ m/^(blastp\/)([^\/]*)(\d{5})/) + { + $prefix = $1.$2; + push(@starts, "$3"); + } + + # escape characters + if($prefix =~ /(\+|\?|\*|\[|\])/) + { + $prefix =~ s/\+/\\+/; + $prefix =~ s/\?/\\?/; + $prefix =~ s/\*/\\*/; + $prefix =~ s/\[/\\[/; + $prefix =~ s/\]/\\]/; + } + push(@prefixes, $prefix); + + + #different entries have different prefixes + for($count =0; $count < @jobs; $count++) + { + if($jobs[$count] !~ m/^$prefix(.*)/) + { + if($jobs[$count] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/) + { + $prefix = $1.$2.$3; + + push(@starts, "$4"); + + if($prefix =~ /(\+|\?|\*|\[|\])/) + { + $prefix =~ s/\+/\\+/; + $prefix =~ s/\?/\\?/; + $prefix =~ s/\*/\\*/; + $prefix =~ s/\[/\\[/; + $prefix =~ s/\]/\\]/; + } + + push(@prefixes, $prefix); + } + } + } + + for($count =0; $count < @prefixes; $count++) + { + $prefix = $prefixes[$count]; + $start = $starts[$count]; + + # build the index description that need to be run + $index="$start-"; + $end="$start"; + for($j =0; $j < @jobs; $j++) + { + if($jobs[$j] =~ m/^$prefix(.*)/) + { + if($jobs[$j] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/) + { + if($j == @jobs-1) + { + $index = "$index$4"; + } + elsif($end+1 >= $4) + { + $end = "$4"; + } + else + { + $index = "$index$end,$4-" + } + } + } + } + + if($index =~ m/(\-)$/) + { + $index = "$index$end"; + } + + print "$prefix, $index, $num"; + bsub($prefix, $index, $num); + } +} + + +# start job arrays +sub bsub +{ + my ($prefix, $index, $num) = @_; + + my $name = $prefix; + + if($prefix =~ m/(\/blastp\/)(.*)/) + { + $name = "$2"; + } + + my $random = int( rand( 999+1 ) ); + + push(@bsub_jobs, "$name$random\_blastp"); + + my $QUEUE="longblastq"; + if($NUM_JOBS <= 6) + { + $QUEUE="normal"; + } + + print "flexi_blast.pl -p $database $prefix$num\n"; + + open(BSUB, "| bsub -q $QUEUE -o /dev/null -n 1 -R \"select[blast && mem > 500] rusage[mem=500]\" -J$name$random\_blastp\"[$index]%16\"") or die "could not open bsub pipe : $!"; + print BSUB "cd $NEW_WDIR\n"; + print BSUB "flexi_blast.pl -p $database $prefix$num"; + print BSUB "\${LSB_JOBINDEX} > $prefix$num"; + print BSUB "\${LSB_JOBINDEX}\.out\n"; + + print BSUB "gzip -9f $prefix$num"; + print BSUB "\${LSB_JOBINDEX}\.out\n"; + + close BSUB or die "--Could not submit job : $!"; +} + +' + (echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill chmod a+x $PROG.kill +HOSTNAME=`hostname` +REMOTE=N + +case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) +esac + if [ x$ONEFILE = x ] then - for i in `cat $1` - do - run_one_prog $i $i.out $EXPANDED_DATABASE - done - + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + CMD=`echo $PERL_PROG_1` + ssh babel "cd $WDIR; perl -w -e '$CMD' \"$1\" \"$EXPANDED_DATABASE\" \"$PWD\"" + else + perl -w -e "$PERL_PROG" "$1" "$EXPANDED_DATABASE" "$PWD" + fi else run_one_prog $1 $2 $EXPANDED_DATABASE fi diff --git a/etc/run_fasta b/etc/run_fasta index 13f597b31..286300de9 100755 --- a/etc/run_fasta +++ b/etc/run_fasta @@ -6,7 +6,7 @@ # to customise this script see the function called run_one_prog below -RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_fasta,v 1.14 2005-12-20 13:44:18 tjc Exp $" +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_fasta,v 1.15 2006-06-21 10:11:36 tjc Exp $" PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` @@ -41,20 +41,9 @@ run_one_prog () { ### change these lines: - EXEC=${EXEC-`which fasta34 2>/dev/null`} - if [ ! -x "$EXEC" ]; then - EXEC=`find Artemis* -name fasta34 2>/dev/null` - fi - - if [ ! -f "$FASTLIBS" ]; then - DATABASE_TMP=`echo $DATABASE | sed 's|\%||'` - DATABASE_TMP="$PWD/"`find Artemis* -name blast-data 2>/dev/null`"/$DATABASE_TMP" - if [ -f "$DATABASE_TMP" ]; then - DATABASE="$DATABASE_TMP" - fi - fi - + EXEC=fasta34 + echo "about to start $EXEC with input from $INPUT_FILE and output to" 1>&2 echo "$OUTPUT_FILE using database $DATABASE" 1>&2 -- GitLab