Skip to content
Snippets Groups Projects
run_hth 3.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    #!/bin/sh -
    
    # this script will run a search program on a sequence input file or on each
    # file in a file of filenames
    
    # to customise this script see the function called run_one_prog below
    
    
    RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_hth,v 1.1 2004-06-09 10:03:06 tjc Exp $"
    
    PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'`
    
    
    if [ $# = 4 -a x$1 = x-onefile ]
    then
        shift
        ONEFILE=t
        PARAMETERS=$3 export PARAMETERS
    else
        if [ $# = 2 ]
        then
            PARAMETERS=$2 export PARAMETERS
        else
            echo usage: $0 -onefile input_file output_file parameters 1>&2
            echo    or: $0 file_of_filenames parameters 1>&2
            exit 1
        fi
    fi
    
    
    # expand any ~ or environment variables
    EXPANDED_PARAMETERS=`echo "echo $PARAMETERS" | /bin/csh -f`
    
    
    ### change this function to suit your site:
    
    run_one_prog () {
        INPUT_FILE=$1
        OUTPUT_FILE=$2
        PARAMETERS=$3
    
    
        ### change these lines:
        EXEC=helixturnhelix
    
        echo "about to start $EXEC with input from $INPUT_FILE and output to" 1>&2
        echo "$OUTPUT_FILE using database $PARAMETERS" 1>&2
        echo "command line: $EXEC $PARAMETERS $INPUT_FILE" 1>&2
    
        $EXEC $INPUT_FILE $OUTPUT_FILE $PARAMETERS 2> hth_errors.new
    
        # add/change the flags to suit your site:
        $EXEC $INPUT_FILE -outfile $OUTPUT_FILE $PARAMETERS 2>&1 | 
          tee ${PROG}_errors.new 1>&2
    
        #### end of changes
    
    
        if [ -s ${PROG}_errors.new ]
        then
            ( echo ERROR running $PROG: ; echo; 
              echo ===================================================
              cat ${PROG}_errors.new ) >> $OUTPUT_FILE
            cat ${PROG}_errors.new >> ${PROG}_errors
        fi
    }
    
    (echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill
    
    chmod a+x $PROG.kill
    
    if [ x$ONEFILE = x ]
    then
        for i in `cat $1`
        do
            run_one_prog $i $i.out $EXPANDED_PARAMETERS
        done
    
    else
        run_one_prog $1 $2 $EXPANDED_PARAMETERS
    fi
    
    if [ x$ONEFILE = x ]
    then
        PEPFILES=`cat $1`
    else
        PEPFILES=$1
    fi
    
    if [ -e hth/hth.tab ]
    then
        mv -f hth/hth.tab hth/hth.tab.old
    fi
    
    for i in $PEPFILES
    do
    # write a tab file containing a feature for each hth
    perl -e '
    open PEPFILE, "$ARGV[0]" or die "$0: cannot open $ARGV[0]\n";
    
    my $first_line = <PEPFILE>;
    
    close PEPFILE;
    
    if ($first_line !~ m/^>(.*?), (.*) (\d+):(\d+)\s+(\S+)/) {
      die "first line of $ARGV[0] is badly formatted\n";
    }
    
    my ($gene_name, $description, $pep_first_base,
        $pep_last_base, $forward_or_back) = ($1, $2, $3, $4, $5);
    
    open HTHFILE, "$ARGV[1]" or die "$0: cannot open $ARGV[1]\n";
    
    my $score;
    my $first_match_base;
    
    while (<HTHFILE>) {
      if (m/^Score (\d+) .* in .* at residue (\d+)/) {
        $score = $1;
        $first_match_base = $2;
      }
    
      if (m/^\s+Sequence:\s+(\S+)/ && 
          defined ($score) && defined ($pep_first_base)) {
        if ($forward_or_back eq "forward") {
          my $start_base = $pep_first_base + ($first_match_base - 1) * 3;
          my $end_base = $start_base + (length $1) * 3 - 1;
          print <<EOF;
    FT   CDS_motif       $start_base..$end_base
    FT                   /note="Helix-turn-helix from $gene_name score $score"
    FT                   /label=hth
    EOF
        } else {
          my $end_base = $pep_last_base - ($first_match_base - 1) * 3;
          my $start_base = $end_base - ((length $1) * 3 - 1);
          print <<EOF;
    FT   CDS_motif       complement($start_base..$end_base)
    FT                   /note="Helix-turn-helix from $gene_name score $score"
    FT                   /label=hth
    EOF
        }
      }
    }
    ' $i $i.out >> hth/hth.tab
    
    done
    
    exit 0