diff --git a/etc/options b/etc/options index 4cbd6320920113ce57932dd6b99f05729f2ecf9f..1827419bcb27b846b4ad9c966118c22d1175d280 100644 --- a/etc/options +++ b/etc/options @@ -2,7 +2,7 @@ # (Note that comment lines start with a hash (#) symbol) -# $Header: //tmp/pathsoft/artemis/etc/options,v 1.3 2004-06-10 10:38:57 tjc Exp $ +# $Header: //tmp/pathsoft/artemis/etc/options,v 1.4 2004-06-10 13:32:57 tjc Exp $ # This file should contain option settings that look like this: # @@ -487,7 +487,7 @@ feature_protein_programs = \ hth - \ smart - \ clustalx PROTEIN \ - jalview - + jalview PROTEIN feature_dna_programs = \ tblastx embl_other \ @@ -497,6 +497,7 @@ feature_dna_programs = \ clustalx DNA application_programs = \ + jalview # this is the list of keys that should be displayed by default in the edit # window diff --git a/etc/run_jalview b/etc/run_jalview index bf214db58819f454661629c01ba0889c5e46bdb9..dd8e783839b2cee93235c8a6b7ca46ff51bb42fd 100755 --- a/etc/run_jalview +++ b/etc/run_jalview @@ -48,4 +48,170 @@ if (-e $file) } ' -perl -w -e "$PERL_PROG" "$@" +PERL_PROG_FASTA=' + +use strict; + +if (@ARGV != 1) +{ + die "$0 needs one argument - a file of feature file names\n"; +} + + +sub process_fasta +{ + my $feature_file = shift; + my $fasta_results_file = shift; + + my $sequence_file; + + if ($fasta_results_file =~ /(.*).out/) { + $sequence_file = $1; + } else { + die "cannot understand $fasta_results_file\n"; + } + + if (! -e $sequence_file) { + die "cannot find $sequence_file\n"; + } + + my $fasta_seq_for_alignment = ""; + + open SEQ_FILE, $sequence_file or die "cannot open $sequence_file: $!\n"; + + while (<SEQ_FILE>) { + $fasta_seq_for_alignment .= $_; + } + + close SEQ_FILE; + + if (-e $fasta_results_file) { + open FASTA_OUTPUT, $fasta_results_file + or die "cannot open $fasta_results_file: $!\n"; + } else { + my $gzip_fasta_results_file = $fasta_results_file . ".gz"; + if (-e $gzip_fasta_results_file) { + open FASTA_OUTPUT, "gzip -d < $gzip_fasta_results_file |" + or die "cannot open $gzip_fasta_results_file: $!\n"; + } else { + die "cannot find $fasta_results_file or $gzip_fasta_results_file\n"; + } + } + + + my $top_re = "^The best scores are:"; + + my $seen_top = 0; + my $seen_bottom = 0; + + my @protein_ids = (); + + while (<FASTA_OUTPUT>) { + if (/$top_re/) { + $seen_top = 1; + next; + } + + if ($seen_top && /^\s*$/) { + $seen_bottom = 1; + next; + } + + if ($seen_top && !$seen_bottom) { + if (/^(\S+)/) { + if (@protein_ids < 20) { + push @protein_ids, "$1" + } else { + last; + } + } else { + warn "cannot understand this line:\n$_\n"; + } + } + } + + my %hash = (); + + @hash{@protein_ids} = (1) x @protein_ids; + + @protein_ids = sort keys %hash; + + my $protein_db = "swall"; + + # look for each of the IDs from the FASTA output in each of the DBs + + for my $id (@protein_ids) { + my $fetch = "getz -sf fasta -f seq [swall-id:$id]"; + + my $temp_seq = ""; + + open FETCH, "$fetch |" or + die "cannot open pipe to $fetch: $!\n"; + + while (<FETCH>) { + $temp_seq .= $_; + } + + close FETCH; + + if ($? == 0) { + $fasta_seq_for_alignment .= $temp_seq; + } else { + print STDERR "$id was not found in $protein_db\n"; + } + } + + my $msf_file = "$feature_file.fasta_msf"; + + my $emboss_prog = "emma"; + + my $emboss_command_line = "$emboss_prog -filter -stdout -osf msf -dendoutfile /dev/null > $msf_file"; + + open EMBOSS, "|$emboss_command_line" or + die "cannot open pipe to $emboss_prog: $!\n"; + + print EMBOSS $fasta_seq_for_alignment; + + close EMBOSS; + + my $jalview_prog = "jalview"; + + print STDERR "\nstarting $jalview_prog:\n"; + + system "$jalview_prog", "$msf_file"; +} + + +my $file; + +while (defined ($file = <>)) { + chomp $file; + + if (-e $file) { + open IN_FILE, "$file\n" or die "cannot open $file\n"; + + my $line; + + while (defined ($line = <IN_FILE>)) { + if ($line =~ m!/fasta_file="(.*)"!) { + my $fasta_results_file = $1; + + process_fasta $file, $fasta_results_file; + + last; + } + } + + close IN_FILE; + } +}' + + +if [ $# != 1 ] +then + perl -w -e "$PERL_PROG" "$@" +else + perl -w -e "$PERL_PROG_FASTA" "$@" +fi + +