From 7d3331fca595e9e101e84578267047a0074828a1 Mon Sep 17 00:00:00 2001 From: tjc <tjc@ee4ac58c-ac51-4696-9907-e4b3aa274f04> Date: Tue, 27 Feb 2007 10:59:41 +0000 Subject: [PATCH] generic version git-svn-id: svn+ssh://svn.internal.sanger.ac.uk/repos/svn/pathsoft/artemis/trunk@5525 ee4ac58c-ac51-4696-9907-e4b3aa274f04 --- etc/run_blastp | 373 ++++--------------------------------------------- 1 file changed, 26 insertions(+), 347 deletions(-) diff --git a/etc/run_blastp b/etc/run_blastp index 2ee1b2a1b..6edeae90d 100755 --- a/etc/run_blastp +++ b/etc/run_blastp @@ -6,7 +6,7 @@ # to customise this script see the function called run_one_prog below -RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp,v 1.5 2006-06-21 10:11:36 tjc Exp $" +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp,v 1.6 2007-02-27 10:59:41 tjc Exp $" PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` @@ -21,17 +21,17 @@ else then DATABASE=$2 export DATABASE else - echo usage: $0 -onefile input_file output_file database 1>&2 - echo or: $0 file_of_filenames database 1>&2 + echo usage: $0 -onefile input_file output_file database + echo or: $0 file_of_filenames database exit 1 fi fi - # expand any ~ or environment variables EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` + ### change this function to suit your site: run_one_prog () { @@ -41,7 +41,19 @@ run_one_prog () { ### change these lines: - EXEC=blastall + EXEC=${EXEC-`which blastall 2>/dev/null`} + + if [ ! -x "$EXEC" ]; then + EXEC=`find Artemis* -name blastall 2>/dev/null` + fi + + if [ ! -d "$BLASTDB" ]; then + DATABASE_TMP=`echo $DATABASE | sed 's|\%||'` + DATABASE_TMP="$PWD/"`find Artemis* -name blast-data 2>/dev/null`"/$DATABASE_TMP" + if [ -f "$DATABASE_TMP" ]; then + DATABASE="$DATABASE_TMP" + fi + fi echo "about to start $EXEC with input from $INPUT_FILE and output to" echo "$OUTPUT_FILE using database $DATABASE" @@ -49,30 +61,10 @@ run_one_prog () { EXTRA_ARGS= - HOSTNAME=`hostname` - REMOTE=N - - case $HOSTNAME in - deskpro*) - REMOTE=Y ;; - *) - esac - - if [ $REMOTE = "Y" ]; then - WDIR=`pwd` - export WDIR -# rsh babel "cd $WDIR; lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ -# blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ -# $EXTRA_ARGS >! $OUTPUT_FILE" - ssh babel "cd $WDIR; bsub -q longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS" - else # add/change the flags to suit your site: -# lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ -# blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ -# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | -# tee ${PROG}_errors.new 1>&2 - bsub -q longblastq -o $OUTPUT_FILE -e ${PROG}_errors.new -I flexi_blast.pl -p $DATABASE $INPUT_FILE $EXTRA_ARGS - fi + nice -19 $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ + $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 #### end of changes @@ -82,337 +74,24 @@ run_one_prog () { if [ -s ${PROG}_errors.new ] then - ( echo ERROR running $PROG: ; echo; + ( echo ERROR running $PROG: ; echo; echo =================================================== cat ${PROG}_errors.new ) >> $OUTPUT_FILE cat ${PROG}_errors.new >> ${PROG}_errors fi } -PERL_PROG_1=' - -local *BSUB; - -my $file = $ARGV[0]; -my $database = $ARGV[1]; -my $pwd = $ARGV[2]; -chomp $file; -chomp $database; - -open(BSUB, "| bsub -q normal -o fasta_errors -n 1 -R \"select[blast && mem > 500] rusage[r1m=1:mem=500]\" -K") or die "could not open bsub pipe : $!"; -open(LIST_FILE,$file); - -$EXEC="blastall"; - -while(my $inFile = <LIST_FILE>) -{ - chomp($inFile); - - if($inFile =~ m/^($pwd)(.*)/) - { - my $inFile_tmp = $2; - while($inFile_tmp =~ m/^(\/)(.*)/) - { - $inFile_tmp = $2; - } - - if( -e $inFile_tmp ) - { - $inFile = $inFile_tmp; - } - } - - if($inFile =~ m/^(\S{100})/) - { - if($inFile =~ m/^(\S{90,})(blastp\/\S+)/) - { - my $inFile_tmp = $1; - - if( -e $inFile_tmp ) - { - print BSUB "cd $inFile_tmp\n"; - $inFile = $2; - } - } - } - - print BSUB "flexi_blast.pl -p $database $inFile $database ktup 2 > $inFile\.out\n"; - print BSUB "gzip -9 $inFile\.out\n"; -} -close BSUB or die "--Could not submit job : $!"; -close LIST_FILE; -' - - - -PERL_PROG=' - -local *BSUB; - -my $file = $ARGV[0]; -my $database = $ARGV[1]; -my $pwd = $ARGV[2]; -chomp $file; -chomp $database; - -open(LIST_FILE,$file); - -$NEW_WDIR="."; -$NUM_JOBS=0; - -while(my $inFile = <LIST_FILE>) -{ - $NUM_JOBS++; - chomp($inFile); - - if($inFile =~ m/^($pwd)(.*)/) - { - my $inFile_tmp = $2; - while($inFile_tmp =~ m/^(\/)(.*)/) - { - $inFile_tmp = $2; - } - - if( -e $inFile_tmp ) - { - $inFile = $inFile_tmp; - } - } - - - if($inFile =~ m/^(\S{100})/) - { - if($inFile =~ m/^(\S{90,})(blastp\/\S+)/) - { - my $inFile_tmp = $1; - - if( -e $inFile_tmp ) - { - $NEW_WDIR=$inFile_tmp; - $inFile = $2; - } - } - } - # find number of leading zero - - if($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{4})(\d{1})$/) - { - push(@jobs10, $inFile); - } - elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{3})(\d{2})$/) - { - push(@jobs100, $inFile); - } - elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{2})(\d{3})$/) - { - push(@jobs1000, $inFile); - } - elsif($inFile =~ m/^(.*)(blastp\/)([^\/]*)(0{1})(\d{4})$/) - { - push(@jobs10000, $inFile); - } - else - { - push(@jobs100000, $inFile); - } -} -close LIST_FILE; - -if(defined @jobs10) -{ - my( $num ) = sprintf( "%04d", 0); - submit($num, @jobs10); -} - -if(defined @jobs100) -{ - my( $num ) = sprintf( "%03d", 0); - submit($num, @jobs100); -} - -if(defined @jobs1000) -{ - my( $num ) = sprintf( "%02d", 0); - submit($num, @jobs1000); -} - -if(defined @jobs10000) -{ - my( $num ) = sprintf( "%01d", 0); - submit($num, @jobs10000); -} - -if(defined @jobs100000) -{ - submit("", @jobs100000); -} - -for($count = 0; $count < @bsub_jobs; $count++) -{ - open(BSUB, "| bsub -q normal -R \"select[mem > 1] rusage[mem=1]\" -o /dev/null -e /dev/null -w \"ended($bsub_jobs[$count])\" -K -J \"$bsub_jobs[$count]\_fin\"") or die "could not open bsub pipe : $!"; - print BSUB "\"echo finished > /dev/null\" > /dev/null 2> /dev/null"; - close BSUB; # or die "--Could not submit job : $!"; -} - - -sub submit -{ - my ($num, @jobs) = @_; - - my $prefix; - my @starts; - my @prefixes; - - if($jobs[0] =~ m/^(.*)(\/blastp\/)([^\/]*)(\d{5})/) - { - $prefix = $1.$2.$3; - push(@starts, "$4"); - } - elsif($jobs[0] =~ m/^(blastp\/)([^\/]*)(\d{5})/) - { - $prefix = $1.$2; - push(@starts, "$3"); - } - - # escape characters - if($prefix =~ /(\+|\?|\*|\[|\])/) - { - $prefix =~ s/\+/\\+/; - $prefix =~ s/\?/\\?/; - $prefix =~ s/\*/\\*/; - $prefix =~ s/\[/\\[/; - $prefix =~ s/\]/\\]/; - } - push(@prefixes, $prefix); - - - #different entries have different prefixes - for($count =0; $count < @jobs; $count++) - { - if($jobs[$count] !~ m/^$prefix(.*)/) - { - if($jobs[$count] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/) - { - $prefix = $1.$2.$3; - - push(@starts, "$4"); - - if($prefix =~ /(\+|\?|\*|\[|\])/) - { - $prefix =~ s/\+/\\+/; - $prefix =~ s/\?/\\?/; - $prefix =~ s/\*/\\*/; - $prefix =~ s/\[/\\[/; - $prefix =~ s/\]/\\]/; - } - - push(@prefixes, $prefix); - } - } - } - - for($count =0; $count < @prefixes; $count++) - { - $prefix = $prefixes[$count]; - $start = $starts[$count]; - - # build the index description that need to be run - $index="$start-"; - $end="$start"; - for($j =0; $j < @jobs; $j++) - { - if($jobs[$j] =~ m/^$prefix(.*)/) - { - if($jobs[$j] =~ m/^(.*)(blastp\/)([^\/]*)(\d{5})/) - { - if($j == @jobs-1) - { - $index = "$index$4"; - } - elsif($end+1 >= $4) - { - $end = "$4"; - } - else - { - $index = "$index$end,$4-" - } - } - } - } - - if($index =~ m/(\-)$/) - { - $index = "$index$end"; - } - - print "$prefix, $index, $num"; - bsub($prefix, $index, $num); - } -} - - -# start job arrays -sub bsub -{ - my ($prefix, $index, $num) = @_; - - my $name = $prefix; - - if($prefix =~ m/(\/blastp\/)(.*)/) - { - $name = "$2"; - } - - my $random = int( rand( 999+1 ) ); - - push(@bsub_jobs, "$name$random\_blastp"); - - my $QUEUE="longblastq"; - if($NUM_JOBS <= 6) - { - $QUEUE="normal"; - } - - print "flexi_blast.pl -p $database $prefix$num\n"; - - open(BSUB, "| bsub -q $QUEUE -o /dev/null -n 1 -R \"select[blast && mem > 500] rusage[mem=500]\" -J$name$random\_blastp\"[$index]%16\"") or die "could not open bsub pipe : $!"; - print BSUB "cd $NEW_WDIR\n"; - print BSUB "flexi_blast.pl -p $database $prefix$num"; - print BSUB "\${LSB_JOBINDEX} > $prefix$num"; - print BSUB "\${LSB_JOBINDEX}\.out\n"; - - print BSUB "gzip -9f $prefix$num"; - print BSUB "\${LSB_JOBINDEX}\.out\n"; - - close BSUB or die "--Could not submit job : $!"; -} - -' - (echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill chmod a+x $PROG.kill -HOSTNAME=`hostname` -REMOTE=N - -case $HOSTNAME in - deskpro*) - REMOTE=Y ;; - *) -esac - if [ x$ONEFILE = x ] then - if [ $REMOTE = "Y" ]; then - WDIR=`pwd` - export WDIR - CMD=`echo $PERL_PROG_1` - ssh babel "cd $WDIR; perl -w -e '$CMD' \"$1\" \"$EXPANDED_DATABASE\" \"$PWD\"" - else - perl -w -e "$PERL_PROG" "$1" "$EXPANDED_DATABASE" "$PWD" - fi + for i in `cat $1` + do + run_one_prog $i $i.out $EXPANDED_DATABASE + done + else run_one_prog $1 $2 $EXPANDED_DATABASE fi -- GitLab