diff --git a/etc/run_blastp.sanger b/etc/run_blastp.sanger
index cfb582455ba093ecd708f9bb293ba6c6b978ed5d..18bedcc9ad04775ac3e2ae58f00752ba491ab428 100755
--- a/etc/run_blastp.sanger
+++ b/etc/run_blastp.sanger
@@ -6,14 +6,10 @@
# to customise this script see the function called run_one_prog below
-RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp.sanger,v 1.4 2005-06-15 19:54:00 tjc Exp $"
+RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp.sanger,v 1.5 2007-02-27 10:56:30 tjc Exp $"
PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'`
-#BLASTDB=/data/blastdb
-#BLASTMAT=/data/blastdb/aa
-#export BLASTDB
-#export BLASTMAT
if [ $# = 4 -a x$1 = x-onefile ]
then
@@ -25,17 +21,17 @@ else
then
DATABASE=$2 export DATABASE
else
- echo usage: $0 -onefile input_file output_file database
- echo or: $0 file_of_filenames database
+ echo usage: $0 -onefile input_file output_file database 1>&2
+ echo or: $0 file_of_filenames database 1>&2
exit 1
fi
fi
+
# expand any ~ or environment variables
EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f`
-
### change this function to suit your site:
run_one_prog () {
@@ -73,7 +69,7 @@ run_one_prog () {
# add/change the flags to suit your site:
# lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \
# blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \
-# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE |
+# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE |
# tee ${PROG}_errors.new 1>&2
bsub -q longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS
fi
@@ -86,26 +82,337 @@ run_one_prog () {
if [ -s ${PROG}_errors.new ]
then
- ( echo ERROR running $PROG: ; echo;
+ ( echo ERROR running $PROG: ; echo;
echo ===================================================
cat ${PROG}_errors.new ) >> $OUTPUT_FILE
cat ${PROG}_errors.new >> ${PROG}_errors
fi
}
+PERL_PROG_1='
+
+local *BSUB;
+
+my $file = $ARGV[0];
+my $database = $ARGV[1];
+my $pwd = $ARGV[2];
+chomp $file;
+chomp $database;
+
+open(BSUB, "| bsub -q normal -o fasta_errors -n 1 -R \"select[blast && mem > 500] rusage[r1m=1:mem=500]\" -K") or die "could not open bsub pipe : $!";
+open(LIST_FILE,$file);
+
+$EXEC="blastall";
+
+while(my $inFile = <LIST_FILE>)
+{
+ chomp($inFile);
+
+ if($inFile =~ m/^($pwd)(.*)/)
+ {
+ my $inFile_tmp = $2;
+ while($inFile_tmp =~ m/^(\/)(.*)/)
+ {
+ $inFile_tmp = $2;
+ }
+
+ if( -e $inFile_tmp )
+ {
+ $inFile = $inFile_tmp;
+ }
+ }
+
+ if($inFile =~ m/^(\S{100})/)
+ {
+ if($inFile =~ m/^(\S{90,})(blastp\/\S+)/)
+ {
+ my $inFile_tmp = $1;
+
+ if( -e $inFile_tmp )
+ {
+ print BSUB "cd $inFile_tmp\n";
+ $inFile = $2;
+ }
+ }
+ }
+
+ print BSUB "flexi_blast.pl -p $database $inFile $database ktup 2 > $inFile\.out\n";
+ print BSUB "gzip -9 $inFile\.out\n";
+}
+close BSUB or die "--Could not submit job : $!";
+close LIST_FILE;
+'
+
+
+
+PERL_PROG='
+
+local *BSUB;
+
+my $file = $ARGV[0];
+my $database = $ARGV[1];
+my $pwd = $ARGV[2];
+chomp $file;
+chomp $database;
+
+open(LIST_FILE,$file);
+
+$NEW_WDIR=".";
+$NUM_JOBS=0;
+
+while(my $inFile = <LIST_FILE>)
+{
+ $NUM_JOBS++;
+ chomp($inFile);
+
+ if($inFile =~ m/^($pwd)(.*)/)
+ {
+ my $inFile_tmp = $2;
+ while($inFile_tmp =~ m/^(\/)(.*)/)
+ {
+ $inFile_tmp = $2;
+ }
+
+ if( -e $inFile_tmp )
+ {
+ $inFile = $inFile_tmp;
+ }
+ }
+
+
+ if($inFile =~ m/^(\S{100})/)
+ {
+ if($inFile =~ m/^(\S{90,})(blastp\/\S+)/)
+ {
+ my $inFile_tmp = $1;
+
+ if( -e $inFile_tmp )
+ {
+ $NEW_WDIR=$inFile_tmp;
+ $inFile = $2;
+ }
+ }
+ }
+ # find number of leading zero
+
+ if($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{4})(\d{1})$/)
+ {
+ push(@jobs10, $inFile);
+ }
+ elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{3})(\d{2})$/)
+ {
+ push(@jobs100, $inFile);
+ }
+ elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{2})(\d{3})$/)
+ {
+ push(@jobs1000, $inFile);
+ }
+ elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{1})(\d{4})$/)
+ {
+ push(@jobs10000, $inFile);
+ }
+ else
+ {
+ push(@jobs100000, $inFile);
+ }
+}
+close LIST_FILE;
+
+if(defined @jobs10)
+{
+ my( $num ) = sprintf( "%04d", 0);
+ submit($num, @jobs10);
+}
+
+if(defined @jobs100)
+{
+ my( $num ) = sprintf( "%03d", 0);
+ submit($num, @jobs100);
+}
+
+if(defined @jobs1000)
+{
+ my( $num ) = sprintf( "%02d", 0);
+ submit($num, @jobs1000);
+}
+
+if(defined @jobs10000)
+{
+ my( $num ) = sprintf( "%01d", 0);
+ submit($num, @jobs10000);
+}
+
+if(defined @jobs100000)
+{
+ submit("", @jobs100000);
+}
+
+for($count = 0; $count < @bsub_jobs; $count++)
+{
+ open(BSUB, "| bsub -q normal -R \"select[mem > 1] rusage[mem=1]\" -o /dev/null -e /dev/null -w \"ended($bsub_jobs[$count])\" -K -J \"$bsub_jobs[$count]\_fin\"") or die "could not open bsub pipe : $!";
+ print BSUB "\"echo finished > /dev/null\" > /dev/null 2> /dev/null";
+ close BSUB; # or die "--Could not submit job : $!";
+}
+
+
+sub submit
+{
+ my ($num, @jobs) = @_;
+
+ my $prefix;
+ my @starts;
+ my @prefixes;
+
+ if($jobs[0] =~ m/^(.*)(\/blastp\/)([^\/]*)(\d{5})/)
+ {
+ $prefix = $1.$2.$3;
+ push(@starts, "$4");
+ }
+ elsif($jobs[0] =~ m/^(blastp\/)([^\/]*)(\d{5})/)
+ {
+ $prefix = $1.$2;
+ push(@starts, "$3");
+ }
+
+ # escape characters
+ if($prefix =~ /(\+|\?|\*|\[|\])/)
+ {
+ $prefix =~ s/\+/\\+/;
+ $prefix =~ s/\?/\\?/;
+ $prefix =~ s/\*/\\*/;
+ $prefix =~ s/\[/\\[/;
+ $prefix =~ s/\]/\\]/;
+ }
+ push(@prefixes, $prefix);
+
+
+ #different entries have different prefixes
+ for($count =0; $count < @jobs; $count++)
+ {
+ if($jobs[$count] !~ m/^$prefix(.*)/)
+ {
+ if($jobs[$count] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/)
+ {
+ $prefix = $1.$2.$3;
+
+ push(@starts, "$4");
+
+ if($prefix =~ /(\+|\?|\*|\[|\])/)
+ {
+ $prefix =~ s/\+/\\+/;
+ $prefix =~ s/\?/\\?/;
+ $prefix =~ s/\*/\\*/;
+ $prefix =~ s/\[/\\[/;
+ $prefix =~ s/\]/\\]/;
+ }
+
+ push(@prefixes, $prefix);
+ }
+ }
+ }
+
+ for($count =0; $count < @prefixes; $count++)
+ {
+ $prefix = $prefixes[$count];
+ $start = $starts[$count];
+
+ # build the index description that need to be run
+ $index="$start-";
+ $end="$start";
+ for($j =0; $j < @jobs; $j++)
+ {
+ if($jobs[$j] =~ m/^$prefix(.*)/)
+ {
+ if($jobs[$j] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/)
+ {
+ if($j == @jobs-1)
+ {
+ $index = "$index$4";
+ }
+ elsif($end+1 >= $4)
+ {
+ $end = "$4";
+ }
+ else
+ {
+ $index = "$index$end,$4-"
+ }
+ }
+ }
+ }
+
+ if($index =~ m/(\-)$/)
+ {
+ $index = "$index$end";
+ }
+
+ print "$prefix, $index, $num";
+ bsub($prefix, $index, $num);
+ }
+}
+
+
+# start job arrays
+sub bsub
+{
+ my ($prefix, $index, $num) = @_;
+
+ my $name = $prefix;
+
+ if($prefix =~ m/(\/blastp\/)(.*)/)
+ {
+ $name = "$2";
+ }
+
+ my $random = int( rand( 999+1 ) );
+
+ push(@bsub_jobs, "$name$random\_blastp");
+
+ my $QUEUE="longblastq";
+ if($NUM_JOBS <= 6)
+ {
+ $QUEUE="normal";
+ }
+
+ print "flexi_blast.pl -p $database $prefix$num\n";
+
+ open(BSUB, "| bsub -q $QUEUE -o /dev/null -n 1 -R \"select[blast && mem > 500] rusage[mem=500]\" -J$name$random\_blastp\"[$index]%16\"") or die "could not open bsub pipe : $!";
+ print BSUB "cd $NEW_WDIR\n";
+ print BSUB "flexi_blast.pl -p $database $prefix$num";
+ print BSUB "\${LSB_JOBINDEX} > $prefix$num";
+ print BSUB "\${LSB_JOBINDEX}\.out\n";
+
+ print BSUB "gzip -9f $prefix$num";
+ print BSUB "\${LSB_JOBINDEX}\.out\n";
+
+ close BSUB or die "--Could not submit job : $!";
+}
+
+'
+
(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill
chmod a+x $PROG.kill
-echo $1
+HOSTNAME=`hostname`
+REMOTE=N
+
+case $HOSTNAME in
+ deskpro*)
+ REMOTE=Y ;;
+ *)
+esac
+
if [ x$ONEFILE = x ]
then
- for i in `cat $1`
- do
- echo "run_one_prog $i $i.out $EXPANDED_DATABASE"
- run_one_prog $i $i.out $EXPANDED_DATABASE
- done
-
+ if [ $REMOTE = "Y" ]; then
+ WDIR=`pwd`
+ export WDIR
+ CMD=`echo $PERL_PROG_1`
+ ssh babel "cd $WDIR; perl -w -e '$CMD' \"$1\" \"$EXPANDED_DATABASE\" \"$PWD\""
+ else
+ perl -w -e "$PERL_PROG" "$1" "$EXPANDED_DATABASE" "$PWD"
+ fi
else
run_one_prog $1 $2 $EXPANDED_DATABASE
fi