diff --git a/etc/run_blastn.sanger b/etc/run_blastn.sanger new file mode 100755 index 0000000000000000000000000000000000000000..da0f6b949f63243c449027b2e7c2e297000f0c62 --- /dev/null +++ b/etc/run_blastn.sanger @@ -0,0 +1,108 @@ +#!/bin/sh - + +# this script will run a search program on a sequence input file or on each +# file in a file of filenames + +# to customise this script see the function called run_one_prog below + + +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastn.sanger,v 1.1 2005-02-21 11:42:18 tjc Exp $" + +PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` + + +if [ $# = 4 -a x$1 = x-onefile ] +then + shift + ONEFILE=t + DATABASE=$3 export DATABASE +else + if [ $# = 2 ] + then + DATABASE=$2 export DATABASE + else + echo usage: $0 -onefile input_file output_file database + echo or: $0 file_of_filenames database + exit 1 + fi +fi + + +# expand any ~ or environment variables +EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` + + +### change this function to suit your site: + +run_one_prog () { + INPUT_FILE=$1 + OUTPUT_FILE=$2 + DATABASE=$3 + + + ### change these lines: + EXEC=blastall + + echo "about to start $EXEC with input from $INPUT_FILE and output to" + echo "$OUTPUT_FILE using database $DATABASE" + + + EXTRA_ARGS= + + # add/change the flags to suit your site: + + HOSTNAME=`hostname` + REMOTE=N + + case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) + esac + + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + rsh babel "cd $WDIR; lsrun -v blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastn \ + $EXTRA_ARGS >! $OUTPUT_FILE" + else + lsrun -v blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastn \ + $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 + fi + +# lsrun -v blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastn \ +# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | +# tee ${PROG}_errors.new 1>&2 + + #### end of changes + + + # Artemis can read compressed files + gzip -9 $OUTPUT_FILE & + + if [ -s ${PROG}_errors.new ] + then + ( echo ERROR running $PROG: ; echo; + echo =================================================== + cat ${PROG}_errors.new ) >> $OUTPUT_FILE + cat ${PROG}_errors.new >> ${PROG}_errors + fi +} + +(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill + +chmod a+x $PROG.kill + +if [ x$ONEFILE = x ] +then + for i in `cat $1` + do + run_one_prog $i $i.out $EXPANDED_DATABASE + done + +else + run_one_prog $1 $2 $EXPANDED_DATABASE +fi + +exit 0 diff --git a/etc/run_blastp.sanger b/etc/run_blastp.sanger new file mode 100755 index 0000000000000000000000000000000000000000..b97b1d98cec72b2a07eafb5f1e7c3beaa10f556c --- /dev/null +++ b/etc/run_blastp.sanger @@ -0,0 +1,111 @@ +#!/bin/sh - + +# this script will run a search program on a sequence input file or on each +# file in a file of filenames + +# to customise this script see the function called run_one_prog below + + +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastp.sanger,v 1.1 2005-02-21 11:45:09 tjc Exp $" + +PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` + +#BLASTDB=/data/blastdb +#BLASTMAT=/data/blastdb/aa +#export BLASTDB +#export BLASTMAT + +if [ $# = 4 -a x$1 = x-onefile ] +then + shift + ONEFILE=t + DATABASE=$3 export DATABASE +else + if [ $# = 2 ] + then + DATABASE=$2 export DATABASE + else + echo usage: $0 -onefile input_file output_file database + echo or: $0 file_of_filenames database + exit 1 + fi +fi + + +# expand any ~ or environment variables +EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` + + +### change this function to suit your site: + +run_one_prog () { + INPUT_FILE=$1 + OUTPUT_FILE=$2 + DATABASE=$3 + + + ### change these lines: + EXEC=blastall + + echo "about to start $EXEC with input from $INPUT_FILE and output to" + echo "$OUTPUT_FILE using database $DATABASE" + + + EXTRA_ARGS= + + HOSTNAME=`hostname` + REMOTE=N + + case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) + esac + + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + rsh babel "cd $WDIR; lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ + blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ + $EXTRA_ARGS >! $OUTPUT_FILE" + else + # add/change the flags to suit your site: + lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v \ + blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastp \ + $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 + fi + + #### end of changes + + + # Artemis can read compressed files + gzip -9 $OUTPUT_FILE & + + if [ -s ${PROG}_errors.new ] + then + ( echo ERROR running $PROG: ; echo; + echo =================================================== + cat ${PROG}_errors.new ) >> $OUTPUT_FILE + cat ${PROG}_errors.new >> ${PROG}_errors + fi +} + +(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill + +chmod a+x $PROG.kill + +echo $1 +if [ x$ONEFILE = x ] +then + for i in `cat $1` + do + echo "run_one_prog $i $i.out $EXPANDED_DATABASE" + run_one_prog $i $i.out $EXPANDED_DATABASE + done + +else + run_one_prog $1 $2 $EXPANDED_DATABASE +fi + +exit 0 diff --git a/etc/run_blastx.sanger b/etc/run_blastx.sanger new file mode 100755 index 0000000000000000000000000000000000000000..53b525748ab9f7720fbc33277323c9d0aaf2efa8 --- /dev/null +++ b/etc/run_blastx.sanger @@ -0,0 +1,108 @@ +#!/bin/sh - + +# this script will run a search program on a sequence input file or on each +# file in a file of filenames + +# to customise this script see the function called run_one_prog below + + +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_blastx.sanger,v 1.1 2005-02-21 11:46:35 tjc Exp $" + +PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` + + +if [ $# = 4 -a x$1 = x-onefile ] +then + shift + ONEFILE=t + DATABASE=$3 export DATABASE +else + if [ $# = 2 ] + then + DATABASE=$2 export DATABASE + else + echo usage: $0 -onefile input_file output_file database + echo or: $0 file_of_filenames database + exit 1 + fi +fi + + +# expand any ~ or environment variables +EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` + + +### change this function to suit your site: + +run_one_prog () { + INPUT_FILE=$1 + OUTPUT_FILE=$2 + DATABASE=$3 + + + ### change these lines: + EXEC=blastall + + echo "about to start $EXEC with input from $INPUT_FILE and output to" + echo "$OUTPUT_FILE using database $DATABASE" + + + EXTRA_ARGS= + + # add/change the flags to suit your site: + + HOSTNAME=`hostname` + REMOTE=N + + case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) + esac + + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + rsh babel "cd $WDIR; lsrun -v blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastx \ + $EXTRA_ARGS >! $OUTPUT_FILE" + else + lsrun -v blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastx \ + $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 + fi + +# lsrun -v blastwrap.pl $EXEC -d $DATABASE -i $INPUT_FILE -p blastx \ +# $EXTRA_ARGS 2>&1 > $OUTPUT_FILE | +# tee ${PROG}_errors.new 1>&2 + + #### end of changes + + + # Artemis can read compressed files + gzip -9 $OUTPUT_FILE & + + if [ -s ${PROG}_errors.new ] + then + ( echo ERROR running $PROG: ; echo; + echo =================================================== + cat ${PROG}_errors.new ) >> $OUTPUT_FILE + cat ${PROG}_errors.new >> ${PROG}_errors + fi +} + +(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill + +chmod a+x $PROG.kill + +if [ x$ONEFILE = x ] +then + for i in `cat $1` + do + run_one_prog $i $i.out $EXPANDED_DATABASE + done + +else + run_one_prog $1 $2 $EXPANDED_DATABASE +fi + +exit 0 diff --git a/etc/run_clustalx.sanger b/etc/run_clustalx.sanger new file mode 100755 index 0000000000000000000000000000000000000000..f2714f434334c9e7bc49fec5c6392ac9f0435c5b --- /dev/null +++ b/etc/run_clustalx.sanger @@ -0,0 +1,59 @@ +#!/bin/sh - + +# this script will run clustalx on a temporary file containing the concatenated +# contents of files listed in a file of filenames + + +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_clustalx.sanger,v 1.1 2005-02-21 11:49:41 tjc Exp $" + +PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` + + +if [ $# != 1 ] +then + echo usage: $0 file_of_filenames +fi + +(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill + +chmod a+x $PROG.kill + +file_of_filenames=$1 +date=`date +"%y_%m_%d"` +hostname=`hostname` +temp_file_name=clustalx/artemis_temp.$$.$hostname.$date.clustalx_input.fasta + +cat $file_of_filenames | xargs cat > $temp_file_name + +# make sure that the identifiers are unique for clustalx +perl -pne 'if (/^>(\S+)/) { + $name = $1; + if (exists $h{$name}) { + $i = $h{$name}++; + s/^>(\S+)/>$name.$i/; + } else { + $h{$name} = 0; + } +}' $temp_file_name > $temp_file_name.processed + +# delete it at some point +echo "rm -f $temp_file_name* > /dev/null 2>&1" | at now + 8 hours + +HOSTNAME=`hostname` +REMOTE=N + +case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) +esac + +if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + ssh babel "cd $WDIR; clustalx $temp_file_name.processed" +else + clustalx $temp_file_name.processed +fi + +exit 0 diff --git a/etc/run_fasta.sanger b/etc/run_fasta.sanger new file mode 100755 index 0000000000000000000000000000000000000000..091365b724333fd86a8fb85fd69bd3238cfb347f --- /dev/null +++ b/etc/run_fasta.sanger @@ -0,0 +1,198 @@ +#!/bin/sh - + +# this script will run a search program on a sequence input file or on each +# file in a file of filenames + +# to customise this script see the function called run_one_prog below + + +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_fasta.sanger,v 1.1 2005-02-21 11:51:10 tjc Exp $" + +PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` + + +if [ $# = 4 -a x$1 = x-onefile ] +then + shift + ONEFILE=t + DATABASE=$3 export DATABASE +else + if [ $# = 2 ] + then + DATABASE=$2 export DATABASE + else + echo usage: $0 -onefile input_file output_file database 1>&2 + echo or: $0 file_of_filenames database 1>&2 + exit 1 + fi +fi + + +# +# Exchange DB name for fasta reference +# +if [ "$DATABASE" = "%uniprot" ]; then + DATABASE="%U"; +elif [ "$DATABASE" = "%uniprot_archaea" ]; then + DATABASE="%A"; +elif [ "$DATABASE" = "%uniprot_bacteria" ]; then + DATABASE="%B"; +elif [ "$DATABASE" = "%uniprot_eukaryota" ]; then + DATABASE="%E"; +elif [ "$DATABASE" = "%uniprot_viruses" ]; then + DATABASE="%V"; +elif [ "$DATABASE" = "%uniprot_rest" ]; then + DATABASE="%R"; +elif [ "$DATABASE" = "%malaria" ]; then + DATABASE="%M"; +elif [ "$DATABASE" = "%kineto_aa" ]; then + DATABASE="%K"; +fi + +# expand any ~ or environment variables +EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` + +### change this function to suit your site: + +run_one_prog () { + INPUT_FILE=$1 + OUTPUT_FILE=$2 + DATABASE=$3 + +### strip out directory from command line + + IN=`echo $INPUT_FILE | sed -n -e "s|$PWD//||p"` + + if test "$IN" != "" && test -f $IN; then + INPUT_FILE="$IN" + fi + + echo "\n\nIN=$IN\nPWD=$PWD\nINPUT_FILE=$INPUT_FILE\n\n" + ### change these lines: + + ### get sequence size + seq_size=`infoseq "$INPUT_FILE" -length -only -auto | awk '{ sum += $1 } END { print sum }` + + FASTLIBS=/nfs/disk222/yeastpub/bio-soft/fasta/pubseqgbs export FASTLIBS + EXEC=/nfs/disk222/yeastpub/bio-soft/fasta/fasta33_t + + echo "about to start $EXEC with input from $INPUT_FILE and output to" 1>&2 + echo "$OUTPUT_FILE using database $DATABASE" 1>&2 + + # add/change the flags to suit your site: + COMMAND="$EXEC -B -S -q -b 100 -H $INPUT_FILE $DATABASE ktup 2" + + echo "command line: $COMMAND" 1>&2 + +# lsrun -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -v $COMMAND 2>&1 > $OUTPUT_FILE | + + if [ "$seq_size" -lt 50000 ] + then + bsub -q normal -n 1 -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -I $COMMAND 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 + else + bsub -q "longblastq" -n 1 -R 'select[blast && mem > 500] rusage[r1m=1:mem=500]' -I $COMMAND 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 + fi + + #### end of changes + + + # Artemis can read compressed files + gzip -9 $OUTPUT_FILE & + + if [ -s ${PROG}_errors.new ] + then + ( echo ERROR running $PROG: ; echo; + echo =================================================== + cat ${PROG}_errors.new ) >> $OUTPUT_FILE + cat ${PROG}_errors.new >> ${PROG}_errors + fi +} + +PERL_PROG=' + +local *BSUB; + +my $file = $ARGV[0]; +my $database = $ARGV[1]; +my $pwd = $ARGV[2]; +chomp $file; +chomp $database; + +$ENV{'FASTLIBS'} = "/nfs/disk222/yeastpub/bio-soft/fasta/pubseqgbs_test"; + + +open(BSUB, "| bsub -q normal -o fasta_errors -n 1 -R \"select[blast && mem > 500] rusage[r1m=1:mem=500]\" -K") or die "could not open bsub pipe : $!"; +open(LIST_FILE,$file); + +$EXEC="/nfs/disk222/yeastpub/bio-soft/fasta/fasta33_t"; + +while(my $inFile = <LIST_FILE>) +{ + chomp($inFile); + + if($inFile =~ m/^($pwd)(.*)/) + { + my $inFile_tmp = $2; + while($inFile_tmp =~ m/^(\/)(.*)/) + { + $inFile_tmp = $2; + } + + if( -e $inFile_tmp ) + { + $inFile = $inFile_tmp; + } + } + + if($inFile =~ m/^(\S{100})/) + { + if($inFile =~ m/^(\S{90,})(fasta\/\S+)/) + { + my $inFile_tmp = $1; + + if( -e $inFile_tmp ) + { + print BSUB "cd $inFile_tmp\n"; + $inFile = $2; + } + } + } + + print BSUB "$EXEC -B -S -q -b 100 -H $inFile $database ktup 2 > $inFile\.out\n"; + print BSUB "gzip -9 $inFile\.out\n"; +} +close BSUB or die "--Could not submit job : $!"; +close LIST_FILE; + +' + +(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill + +chmod a+x $PROG.kill + +HOSTNAME=`hostname` +REMOTE=N + +case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) +esac + +if [ x$ONEFILE = x ] +then + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + CMD=`echo $PERL_PROG` + rsh babel "cd $WDIR; perl -w -e '$CMD' \"$1\" \"$EXPANDED_DATABASE\" \"$PWD\"" + else + perl -w -e "$PERL_PROG" "$1" "$EXPANDED_DATABASE" "$PWD" + fi +else + run_one_prog $1 $2 $EXPANDED_DATABASE +fi + +exit 0 diff --git a/etc/run_fastx.sanger b/etc/run_fastx.sanger new file mode 100644 index 0000000000000000000000000000000000000000..120492493a44ee35af5e545c2810246f34e721a0 --- /dev/null +++ b/etc/run_fastx.sanger @@ -0,0 +1,103 @@ +#!/bin/sh - + +# this script will run a search program on a sequence input file or on each +# file in a file of filenames + +# to customise this script see the function called run_one_prog below + + +RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_fastx.sanger,v 1.1 2005-02-21 11:53:49 tjc Exp $" + +PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` + + +if [ $# = 4 -a x$1 = x-onefile ] +then + shift + ONEFILE=t + DATABASE=$3 export DATABASE +else + if [ $# = 2 ] + then + DATABASE=$2 export DATABASE + else + echo usage: $0 -onefile input_file output_file database 1>&2 + echo or: $0 file_of_filenames database 1>&2 + exit 1 + fi +fi + + +# expand any ~ or environment variables +EXPANDED_DATABASE=`echo "echo $DATABASE" | /bin/csh -f` + + +### change this function to suit your site: + +run_one_prog () { + INPUT_FILE=$1 + OUTPUT_FILE=$2 + DATABASE=$3 + + + ### change these lines: + + FASTLIBS=/nfs/disk222/yeastpub/bio-soft/fasta/pubseqgbs export FASTLIBS + EXEC=/nfs/disk222/yeastpub/bio-soft/fasta/fastx33_t + + echo "about to start $EXEC with input from $INPUT_FILE and output to" 1>&2 + echo "$OUTPUT_FILE using database $DATABASE" 1>&2 + + # add/change the flags to suit your site: + HOSTNAME=`hostname` + REMOTE=N + + case $HOSTNAME in + deskpro*) + REMOTE=Y ;; + *) + esac + + COMMAND="$EXEC -B -S -q -b 40 -H $INPUT_FILE $DATABASE ktup 2" + echo "command line: $COMMAND" 1>&2 + + if [ $REMOTE = "Y" ]; then + WDIR=`pwd` + export WDIR + rsh babel "cd $WDIR; $COMMAND >! $OUTPUT_FILE" + else + $COMMAND 2>&1 > $OUTPUT_FILE | + tee ${PROG}_errors.new 1>&2 + fi + + #### end of changes + + + # Artemis can read compressed files + gzip -9 $OUTPUT_FILE & + + if [ -s ${PROG}_errors.new ] + then + ( echo ERROR running $PROG: ; echo; + echo =================================================== + cat ${PROG}_errors.new ) >> $OUTPUT_FILE + cat ${PROG}_errors.new >> ${PROG}_errors + fi +} + +(echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill + +chmod a+x $PROG.kill + +if [ x$ONEFILE = x ] +then + for i in `cat $1` + do + run_one_prog $i $i.out $EXPANDED_DATABASE + done + +else + run_one_prog $1 $2 $EXPANDED_DATABASE +fi + +exit 0