Skip to content
Snippets Groups Projects
run_jalview 4.63 KiB
Newer Older
  • Learn to ignore specific revisions
  • tjc's avatar
    tjc committed
    #!/bin/sh -
    
    # This script needs one argument - a file of file names.
    
    
    tjc's avatar
    tjc committed
    PATH=/nfs/disk100/pubseq/emboss/bin/:$PATH
    export PATH
    
    
    tjc's avatar
    tjc committed
    PERL_PROG='
    
    use strict;
    
    if (@ARGV < 1)
    { 
      die "$0 needs one argument - a file of sequence file names\n";
    }
    
    my $file = $ARGV[0];
    
    chomp $file;
    
    if (-e $file)
    {
      my $msf_file = "$file.fasta_msf";   
    
      my $seqnum = 1;
    
    tjc's avatar
    tjc committed
      printf "$msf_file\n";
    
    
    tjc's avatar
    tjc committed
      open(LIST_FILE,$file);
      while(my $line = <LIST_FILE>)
      {
    
    tjc's avatar
    tjc committed
        printf "$line\n";
    
    tjc's avatar
    tjc committed
        chomp($line);
    
        #
        # run descseq to ensure unique names
        #
        my $emboss_command_line = "descseq -append -name _$seqnum $line $line.new -auto";
    
        open EMBOSS, "|$emboss_command_line" or
                  die "cannot open pipe to descseq: $!\n";
        close EMBOSS;
    
        unlink("$line");
        rename("$line.new","$line");
        $seqnum++;
      }
    
      my $emboss_command_line = "emma \"\@$file\" -filter -stdout -osf msf  -dendoutfile /dev/null -outseq $msf_file";
    
      open EMBOSS, "|$emboss_command_line" or
                die "cannot open pipe to emma: $!\n";
      close EMBOSS;
      system "jalview", "$msf_file";
    }
    
    tjc's avatar
    tjc committed
    else
    {
      die "$file does not exist";
    }
    
    tjc's avatar
    tjc committed
    '
    
    
    tjc's avatar
    tjc committed
    PERL_PROG_FASTA='
    
    use strict;
    
    if (@ARGV != 1) 
    {
      die "$0 needs one argument - a file of feature file names\n";
    }
    
    
    sub process_fasta
    {
      my $feature_file = shift;
      my $fasta_results_file = shift;
    
      my $sequence_file;
    
      if ($fasta_results_file =~ /(.*).out/) {
        $sequence_file = $1;
      } else {
        die "cannot understand $fasta_results_file\n";
      }
    
    
    tjc's avatar
    tjc committed
      
    
    tjc's avatar
    tjc committed
      if (! -e $sequence_file) {
    
    tjc's avatar
    tjc committed
        if ($feature_file =~ /(.*)(\/jalview\/)(.+)$/)
        {
          $sequence_file = "$1/$sequence_file";
          if (! -e $fasta_results_file &&
              ! -e "$fasta_results_file.gz")
          {
            $fasta_results_file = "$1/$fasta_results_file";
          }
        }
        if (! -e $sequence_file)
        {
          die "cannot find $sequence_file\n";
        }
    
    tjc's avatar
    tjc committed
      }
    
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
      my $fasta_seq_for_alignment = "";
    
      open SEQ_FILE, $sequence_file or die "cannot open $sequence_file: $!\n";
    
      while (<SEQ_FILE>) {
        $fasta_seq_for_alignment .= $_;
      }
    
      close SEQ_FILE;
    
      if (-e $fasta_results_file) {
        open FASTA_OUTPUT, $fasta_results_file
          or die "cannot open $fasta_results_file: $!\n";
      } else {
        my $gzip_fasta_results_file = $fasta_results_file . ".gz";
        if (-e $gzip_fasta_results_file) {
          open FASTA_OUTPUT, "gzip -d < $gzip_fasta_results_file |"
            or die "cannot open $gzip_fasta_results_file: $!\n";
        } else {
          die "cannot find $fasta_results_file or $gzip_fasta_results_file\n";
        }
      }
    
    
      my $top_re = "^The best scores are:";
    
      my $seen_top = 0;
      my $seen_bottom = 0;
    
      my @protein_ids = ();
    
      while (<FASTA_OUTPUT>) {
        if (/$top_re/) {
          $seen_top = 1;
          next;
        }
    
        if ($seen_top && /^\s*$/) {
          $seen_bottom = 1;
          next;
        }
    
        if ($seen_top && !$seen_bottom) {
          if (/^(\S+)/) {
            if (@protein_ids < 20) {
              push @protein_ids, "$1"
            } else {
              last;
            }
          } else {
            warn "cannot understand this line:\n$_\n";
          }
        }
      }
    
      my %hash = ();
    
      @hash{@protein_ids} = (1) x @protein_ids;
    
      @protein_ids = sort keys %hash;
    
    
    tjc's avatar
    tjc committed
      my $protein_db = "uniprot";
    
    tjc's avatar
    tjc committed
    
      # look for each of the IDs from the FASTA output in each of the DBs
    
      for my $id (@protein_ids) {
    
    tjc's avatar
    tjc committed
        my $fetch = "getz -sf fasta -f seq [uniprot-acc:$id]";
    
    tjc's avatar
    tjc committed
    
        my $temp_seq = "";
    
        open FETCH, "$fetch |" or
          die "cannot open pipe to $fetch: $!\n";
    
        while (<FETCH>) {
          $temp_seq .= $_;
        }
    
        close FETCH;
    
        if ($? == 0) {
          $fasta_seq_for_alignment .= $temp_seq;
        } else {
          print STDERR "$id was not found in $protein_db\n";
        }
      }
    
      my $msf_file = "$feature_file.fasta_msf";
    
      my $emboss_prog = "emma";
    
      my $emboss_command_line = "$emboss_prog -filter -stdout -osf msf  -dendoutfile /dev/null > $msf_file";
    
      open EMBOSS, "|$emboss_command_line" or
         die "cannot open pipe to $emboss_prog: $!\n";
    
      print EMBOSS $fasta_seq_for_alignment;
    
      close EMBOSS;
    
      my $jalview_prog = "jalview";
    
      print STDERR "\nstarting $jalview_prog:\n";
    
      system "$jalview_prog", "$msf_file";
    }
    
    
    my $file;
    
    while (defined ($file = <>)) {
      chomp $file;
    
      if (-e $file) {
        open IN_FILE, "$file\n" or die "cannot open $file\n";
    
        my $line;
    
        while (defined ($line = <IN_FILE>)) {
          if ($line  =~ m!/fasta_file="(.*)"!) {
            my $fasta_results_file = $1;
    
            process_fasta $file, $fasta_results_file;
    
            last;
          }
        }
    
        close IN_FILE;
      }
    }'
    
    
    tjc's avatar
    tjc committed
    echo "Arguments: $@"
    test=`wc -l "$@" | awk '{print $1}'`
    
    tjc's avatar
    tjc committed
    
    
    tjc's avatar
    tjc committed
    if [ $test != 1 ]
    then
      perl -w -e "$PERL_PROG" "$@"
    else
      perl -w -e "$PERL_PROG_FASTA" "$@"
    fi
    
    tjc's avatar
    tjc committed
    
    #if [ $# != 1 ]
    #then
    #  perl -w -e "$PERL_PROG" "$@"
    #else
    #  perl -w -e "$PERL_PROG_FASTA" "$@"
    #fi
    
    tjc's avatar
    tjc committed