From 538dec87bcf488605c35aa8fd2e872326adff65d Mon Sep 17 00:00:00 2001 From: Arturo Medrano Date: Mon, 17 Jul 2023 17:43:50 -0700 Subject: [PATCH] updated scripts to run with new versions of quod and extractTCDB --- alignSeqsFiles.pl | 4 ++-- locateFragment.pl | 49 +++++++++++++++++++++++++++++++++---------------- locfrag.sh | 31 +++++++++++++++++++++---------- 3 files changed, 56 insertions(+), 28 deletions(-) diff --git a/alignSeqsFiles.pl b/alignSeqsFiles.pl index fdb2327..d1dc104 100755 --- a/alignSeqsFiles.pl +++ b/alignSeqsFiles.pl @@ -482,7 +482,7 @@ sub run_quod { my $ylimStr = "--ylim -3 3"; - #Note alnquod requires to add the extension to the image name + #Plot alignment my $alnFig = "$plotsDir/${q}_vs_${s}_qs${qs}_qe${qe}_ss${ss}_se${se}.png"; my $cmd1 = qq(quod.py -q $labelsStr -l "$q (red) and $s (blue)" -o $alnFig $ylimStr --edgecolor +0:red +1:blue --facecolor +0:orange +1:cyan --multi frag -- $qalnFile $seqDir/${q}.faa $salnFile $seqDir/${s}.faa); # print "$cmd1\n\n"; @@ -673,7 +673,7 @@ sub run_pfam_hmmtop { print "\nRunning hmmscan and parsing output....\n"; - my $pfamDB = ($ENV{PFAMDB})? $ENV{PFAMDB} : "$ENV{RESEARCH_DATA}/pfam/pfamdb/Pfam-A.hmm"; + my $pfamDB = ($ENV{PFAMDB})? $ENV{PFAMDB} : "$ENV{RESEARCH_DATA}/DB/domainDBs/xfamDB/Pfam-A.hmm"; my $cmd2 = qq(hmmscan --cpu 4 --noali --cut_ga -o /dev/null --domtblout $pfamFile $pfamDB $topHitsSeqs); system $cmd2 unless (-f $pfamFile && !(-z $pfamFile)); diff --git a/locateFragment.pl b/locateFragment.pl index 462edc5..db21cf9 100755 --- a/locateFragment.pl +++ b/locateFragment.pl @@ -44,9 +44,9 @@ $CheckDep_obj -> checkDependencies; my $fragment = undef; my $accession = undef; -my $accFile = undef +my $accFile = undef; my $outdir = undef; -my $blastdb = undef; +my $blastdb = "uniref90"; my $evalue = 1e-2; my $subMatrix = 'BL50'; my $quiet = 0; @@ -154,11 +154,11 @@ sub run_quod { } my $outPlot = "$outdir/${accession}_map_frag.png"; - my $qstring = ($quiet)? "-q -o $outPlot" : "-o $outPlot"; - my $iString = ($interactive)? "-o $outPlot --show" : ""; + my $qstring = ($quiet)? "-q" : ""; + my $iString = "-o $outPlot"; - my $cmd = qq(quod.py $qstring $iString -l "$accession ($coords)" --xticks 25 --grid $regions -- $sequence); - print "$cmd\n"; + my $cmd = qq(quod.py $qstring $iString -l "$accession ($coords)" --xticks 25 --grid $regions -- $sequence 2>/dev/null); + #print "$cmd\n"; system $cmd; } @@ -175,7 +175,7 @@ sub getSequences { #Sequence for full protein - my $accSeq = (-f $accFile)? $accFile : "$outdir/${acc}.faa"; + my $accSeq = ($accFile && -f $accFile)? $accFile : "$outdir/${acc}.faa"; #Save fragment to file @@ -201,7 +201,7 @@ sub getSequences { system $cmd; #Remove the version and annotations from the sequence file - my $cmd2 = qq(perl -i.bkp -pe 's/^\\>(\\w+)\.\*/\\>\$1/;' $accSeq); + my $cmd2 = qq(perl -i -pe 's/^\\>(\\w+)\.\*/\\>\$1/;' $accSeq); system $cmd2 unless (-f "${accSeq}.pkp"); } @@ -274,7 +274,7 @@ sub read_accession { my ($opt, $value) = @_; #Remove version number if any - $value =~ s/\.\d+$//; + #$value =~ s/\.\d+$//; $accession = $value; } @@ -300,14 +300,31 @@ sub read_fragment { sub read_blastdb { my ($opt, $value) = @_; - my $tmpFile = "${value}.pin"; - - unless (-f $tmpFile && !(-z $tmpFile)) { - die "Error in option -bdb: Blast DB does not exist! -> $value"; + #In case the user provided the whole path to the blastDB. + my $exists1 = qx(ls ${value}*.pin 2>/dev/null); + if ($exists1) { + $blastdb = $value; + return; } - $blastdb = $value; + #In case the user provided only the blastDB name + my @paths = split(/:/, $ENV{BLASTDB}); + my $found = 0; + + foreach my $dir (@paths) { + my $exists2 = qx(ls $dir/${value}*.pin 2>/dev/null); + if ($exists2) { + $blastdb = "$dir/$value"; + $found = 1; + last; + } + } + + #Report an error if BlastDB was not found + unless ($found) { + die "Error in option -bdb: BlastDB not found! -> $value"; + } } @@ -354,8 +371,8 @@ Options -o, --outdir {PATH} (Optional. Default: ./) Path to output directory. --bdb {PATH} (Optional. Default: nr) - Path to the BLAST database where accessions will be extracted from. +-bdb {PATH} (Optional. Default: uniref90) + Path or name of the BLAST database where sequences will be extracted from. -e, --evalue {FLOAT} (Optional. Default: 0.01) E-value cut off when comparing full proteins diff --git a/locfrag.sh b/locfrag.sh index 4d06835..d7002ee 100755 --- a/locfrag.sh +++ b/locfrag.sh @@ -4,12 +4,13 @@ if [ "$1" = "-h" ] then echo "Locate aligned fragments within 2 full proteins" echo "Arguments:" - echo " 1. NCBI Accession of first protein 1" + echo " 1. Accession of protein 1" echo " 2. Aligned fragment of protein 1" - echo " 3. NCBI Accession of first protein 2" + echo " 3. Accession of protein 2" echo " 4. Aligned fragment of protein 2" - echo " 5. (Optional) substitution matrix to use. (Defaul: BL50)" - echo " 6. (Optional) Indicate whether plots will be shown (Values: show/quiet; Default: show)" + echo " 5. (optional) blast DB to extract sequences from (Default: uniref90)" + echo " 6. (Optional) substitution matrix to use. (Defaul: BL50)" + echo " 7. (Optional) Indicate whether plots will be shown (Values: show/quiet; Default: show)" exit 1 fi @@ -18,23 +19,33 @@ fi #Define the substitution matrix to work with mat="BL50" mode="" +db="uniref90" +#Identify the blast DB to extract sequences from +if [[ ! -z "$5" ]] && ([[ "$5" != "quiet" ]] && [[ "$5" != "show" ]] && [[ "$5" != "uniref90" ]]) +then + db="$5" +fi #Identify the type of substitution matrix, if given -if [[ ! -z "$5" ]] && ([[ "$5" != "quiet" ]] && [[ "$5" != "show" ]]) +if [[ ! -z "$6" ]] && ([[ "$6" != "quiet" ]] && [[ "$6" != "show" ]] && [[ "$6" != "uniref90" ]]) then - mat=$5 + mat="$6" fi - #Check the mode of operation: quiet/show -if [[ "$5" == "quiet" ]] || [[ "$6" == "quiet" ]] +if [[ "$6" == "quiet" ]] || [[ "$7" == "quiet" ]] then mode="-q" fi -locateFragment.pl -a $1 -f $2 $mode -locateFragment.pl -a $3 -f $4 $mode + + +#localizing fragments +locateFragment.pl -a $1 -f $2 $mode -bdb $db +locateFragment.pl -a $3 -f $4 $mode -bdb $db + +#Aligning fragments and full sequences alignSeqsFiles.pl -q $1_frag.faa -ql $1_frag -s $3_frag.faa -sl $3_frag -e 0.1 -c 20 -cc X -m $mat alignSeqsFiles.pl -q $1.faa -ql $1 -s $3.faa -sl $3 -e 0.1 -c 5 -cc X -m $mat -- 1.9.1