diff --git a/etc/run_blastp.sanger b/etc/run_blastp.sanger index cfb582455ba093ecd708f9bb293ba6c6b978ed5d..18bedcc9ad04775ac3e2ae58f00752ba491ab428 100755 --- a/etc/run_blastp.sanger +++ b/etc/run_blastp.sanger @@ -6,14 +6,10 @@ # to customise this script see the function called run_one_prog below -RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp.sanger,v 1.4 2005-06-15 19:54:00 tjc Exp $" +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp.sanger,v 1.5 2007-02-27 10:56:30 tjc Exp $" PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` -#BLASTDB=/data/blastdb -#BLASTMAT=/data/blastdb/aa -#export BLASTDB -#export BLASTMAT if [ $# = 4 -a x$1 = x-onefile ] then @@ -25,17 +21,17 @@ else then DATABASE=$2 export DATABASE else - echo usage: $0 -onefile input_file output_file database - echo or: $0 file_of_filenames database + echo usage: $0 -onefile input_file output_file database 1>&2 + echo or: $0 file_of_filenames database 1>&2 exit 1 fi fi + # expand any ~ or environment variables EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` - ### change this function to suit your site: run_one_prog () { @@ -73,7 +69,7 @@ run_one_prog () { # add/change the flags to suit your site: # lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ # blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ -# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | +# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | # tee ${PROG}_errors.new 1>&2 bsub -q longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS fi @@ -86,26 +82,337 @@ run_one_prog () { if [ -s ${PROG}_errors.new ] then - ( echo ERROR running $PROG: ; echo; + ( echo ERROR running $PROG: ; echo; echo =================================================== cat ${PROG}_errors.new ) >> $OUTPUT_FILE cat ${PROG}_errors.new >> ${PROG}_errors fi } +PERL_PROG_1=' + +local *BSUB; + +my $file = $ARGV[0]; +my $database = $ARGV[1]; +my $pwd = $ARGV[2]; +chomp $file; +chomp $database; + +open(BSUB, "| bsub -q normal -o fasta_errors -n 1 -R \"select[blast && mem > 500] rusage[r1m=1:mem=500]\" -K") or die "could not open bsub pipe : $!"; +open(LIST_FILE,$file); + +$EXEC="blastall"; + +while(my $inFile = <LIST_FILE>) +{ + chomp($inFile); + + if($inFile =~ m/^($pwd)(.*)/) + { + my $inFile_tmp = $2; + while($inFile_tmp =~ m/^(\/)(.*)/) + { + $inFile_tmp = $2; + } + + if( -e $inFile_tmp ) + { + $inFile = $inFile_tmp; + } + } + + if($inFile =~ m/^(\S{100})/) + { + if($inFile =~ m/^(\S{90,})(blastp\/\S+)/) + { + my $inFile_tmp = $1; + + if( -e $inFile_tmp ) + { + print BSUB "cd $inFile_tmp\n"; + $inFile = $2; + } + } + } + + print BSUB "flexi_blast.pl -p $database $inFile $database ktup 2 > $inFile\.out\n"; + print BSUB "gzip -9 $inFile\.out\n"; +} +close BSUB or die "--Could not submit job : $!"; +close LIST_FILE; +' + + + +PERL_PROG=' + +local *BSUB; + +my $file = $ARGV[0]; +my $database = $ARGV[1]; +my $pwd = $ARGV[2]; +chomp $file; +chomp $database; + +open(LIST_FILE,$file); + +$NEW_WDIR="."; +$NUM_JOBS=0; + +while(my $inFile = <LIST_FILE>) +{ + $NUM_JOBS++; + chomp($inFile); + + if($inFile =~ m/^($pwd)(.*)/) + { + my $inFile_tmp = $2; + while($inFile_tmp =~ m/^(\/)(.*)/) + { + $inFile_tmp = $2; + } + + if( -e $inFile_tmp ) + { + $inFile = $inFile_tmp; + } + } + + + if($inFile =~ m/^(\S{100})/) + { + if($inFile =~ m/^(\S{90,})(blastp\/\S+)/) + { + my $inFile_tmp = $1; + + if( -e $inFile_tmp ) + { + $NEW_WDIR=$inFile_tmp; + $inFile = $2; + } + } + } + # find number of leading zero + + if($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{4})(\d{1})$/) + { + push(@jobs10, $inFile); + } + elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{3})(\d{2})$/) + { + push(@jobs100, $inFile); + } + elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{2})(\d{3})$/) + { + push(@jobs1000, $inFile); + } + elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{1})(\d{4})$/) + { + push(@jobs10000, $inFile); + } + else + { + push(@jobs100000, $inFile); + } +} +close LIST_FILE; + +if(defined @jobs10) +{ + my( $num ) = sprintf( "%04d", 0); + submit($num, @jobs10); +} + +if(defined @jobs100) +{ + my( $num ) = sprintf( "%03d", 0); + submit($num, @jobs100); +} + +if(defined @jobs1000) +{ + my( $num ) = sprintf( "%02d", 0); + submit($num, @jobs1000); +} + +if(defined @jobs10000) +{ + my( $num ) = sprintf( "%01d", 0); + submit($num, @jobs10000); +} + +if(defined @jobs100000) +{ + submit("", @jobs100000); +} + +for($count = 0; $count < @bsub_jobs; $count++) +{ + open(BSUB, "| bsub -q normal -R \"select[mem > 1] rusage[mem=1]\" -o /dev/null -e /dev/null -w \"ended($bsub_jobs[$count])\" -K -J \"$bsub_jobs[$count]\_fin\"") or die "could not open bsub pipe : $!"; + print BSUB "\"echo finished > /dev/null\" > /dev/null 2> /dev/null"; + close BSUB; # or die "--Could not submit job : $!"; +} + + +sub submit +{ + my ($num, @jobs) = @_; + + my $prefix; + my @starts; + my @prefixes; + + if($jobs[0] =~ m/^(.*)(\/blastp\/)([^\/]*)(\d{5})/) + { + $prefix = $1.$2.$3; + push(@starts, "$4"); + } + elsif($jobs[0] =~ m/^(blastp\/)([^\/]*)(\d{5})/) + { + $prefix = $1.$2; + push(@starts, "$3"); + } + + # escape characters + if($prefix =~ /(\+|\?|\*|\[|\])/) + { + $prefix =~ s/\+/\\+/; + $prefix =~ s/\?/\\?/; + $prefix =~ s/\*/\\*/; + $prefix =~ s/\[/\\[/; + $prefix =~ s/\]/\\]/; + } + push(@prefixes, $prefix); + + + #different entries have different prefixes + for($count =0; $count < @jobs; $count++) + { + if($jobs[$count] !~ m/^$prefix(.*)/) + { + if($jobs[$count] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/) + { + $prefix = $1.$2.$3; + + push(@starts, "$4"); + + if($prefix =~ /(\+|\?|\*|\[|\])/) + { + $prefix =~ s/\+/\\+/; + $prefix =~ s/\?/\\?/; + $prefix =~ s/\*/\\*/; + $prefix =~ s/\[/\\[/; + $prefix =~ s/\]/\\]/; + } + + push(@prefixes, $prefix); + } + } + } + + for($count =0; $count < @prefixes; $count++) + { + $prefix = $prefixes[$count]; + $start = $starts[$count]; + + # build the index description that need to be run + $index="$start-"; + $end="$start"; + for($j =0; $j < @jobs; $j++) + { + if($jobs[$j] =~ m/^$prefix(.*)/) + { + if($jobs[$j] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/) + { + if($j == @jobs-1) + { + $index = "$index$4"; + } + elsif($end+1 >= $4) + { + $end = "$4"; + } + else + { + $index = "$index$end,$4-" + } + } + } + } + + if($index =~ m/(\-)$/) + { + $index = "$index$end"; + } + + print "$prefix, $index, $num"; + bsub($prefix, $index, $num); + } +} + + +# start job arrays +sub bsub +{ + my ($prefix, $index, $num) = @_; + + my $name = $prefix; + + if($prefix =~ m/(\/blastp\/)(.*)/) + { + $name = "$2"; + } + + my $random = int( rand( 999+1 ) ); + + push(@bsub_jobs, "$name$random\_blastp"); + + my $QUEUE="longblastq"; + if($NUM_JOBS <= 6) + { + $QUEUE="normal"; + } + + print "flexi_blast.pl -p $database $prefix$num\n"; + + open(BSUB, "| bsub -q $QUEUE -o /dev/null -n 1 -R \"select[blast && mem > 500] rusage[mem=500]\" -J$name$random\_blastp\"[$index]%16\"") or die "could not open bsub pipe : $!"; + print BSUB "cd $NEW_WDIR\n"; + print BSUB "flexi_blast.pl -p $database $prefix$num"; + print BSUB "\${LSB_JOBINDEX} > $prefix$num"; + print BSUB "\${LSB_JOBINDEX}\.out\n"; + + print BSUB "gzip -9f $prefix$num"; + print BSUB "\${LSB_JOBINDEX}\.out\n"; + + close BSUB or die "--Could not submit job : $!"; +} + +' + (echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill chmod a+x $PROG.kill -echo $1 +HOSTNAME=`hostname` +REMOTE=N + +case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) +esac + if [ x$ONEFILE = x ] then - for i in `cat $1` - do - echo "run_one_prog $i $i.out $EXPANDED_DATABASE" - run_one_prog $i $i.out $EXPANDED_DATABASE - done - + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + CMD=`echo $PERL_PROG_1` + ssh babel "cd $WDIR; perl -w -e '$CMD' \"$1\" \"$EXPANDED_DATABASE\" \"$PWD\"" + else + perl -w -e "$PERL_PROG" "$1" "$EXPANDED_DATABASE" "$PWD" + fi else run_one_prog $1 $2 $EXPANDED_DATABASE fi