Commit 538dec87bcf488605c35aa8fd2e872326adff65d
1 parent
bbf0ef5765
Exists in
master
updated scripts to run with new versions of quod and extractTCDB
Showing 3 changed files with 56 additions and 28 deletions Side-by-side Diff
alignSeqsFiles.pl
View file @
538dec8
... | ... | @@ -482,7 +482,7 @@ |
482 | 482 | my $ylimStr = "--ylim -3 3"; |
483 | 483 | |
484 | 484 | |
485 | - #Note alnquod requires to add the extension to the image name | |
485 | + #Plot alignment | |
486 | 486 | my $alnFig = "$plotsDir/${q}_vs_${s}_qs${qs}_qe${qe}_ss${ss}_se${se}.png"; |
487 | 487 | my $cmd1 = qq(quod.py -q $labelsStr -l "$q (red) and $s (blue)" -o $alnFig $ylimStr --edgecolor +0:red +1:blue --facecolor +0:orange +1:cyan --multi frag -- $qalnFile $seqDir/${q}.faa $salnFile $seqDir/${s}.faa); |
488 | 488 | # print "$cmd1\n\n"; |
... | ... | @@ -673,7 +673,7 @@ |
673 | 673 | |
674 | 674 | print "\nRunning hmmscan and parsing output....\n"; |
675 | 675 | |
676 | - my $pfamDB = ($ENV{PFAMDB})? $ENV{PFAMDB} : "$ENV{RESEARCH_DATA}/pfam/pfamdb/Pfam-A.hmm"; | |
676 | + my $pfamDB = ($ENV{PFAMDB})? $ENV{PFAMDB} : "$ENV{RESEARCH_DATA}/DB/domainDBs/xfamDB/Pfam-A.hmm"; | |
677 | 677 | my $cmd2 = qq(hmmscan --cpu 4 --noali --cut_ga -o /dev/null --domtblout $pfamFile $pfamDB $topHitsSeqs); |
678 | 678 | system $cmd2 unless (-f $pfamFile && !(-z $pfamFile)); |
679 | 679 |
locateFragment.pl
View file @
538dec8
... | ... | @@ -44,9 +44,9 @@ |
44 | 44 | |
45 | 45 | my $fragment = undef; |
46 | 46 | my $accession = undef; |
47 | -my $accFile = undef | |
47 | +my $accFile = undef; | |
48 | 48 | my $outdir = undef; |
49 | -my $blastdb = undef; | |
49 | +my $blastdb = "uniref90"; | |
50 | 50 | my $evalue = 1e-2; |
51 | 51 | my $subMatrix = 'BL50'; |
52 | 52 | my $quiet = 0; |
53 | 53 | |
... | ... | @@ -154,11 +154,11 @@ |
154 | 154 | } |
155 | 155 | |
156 | 156 | my $outPlot = "$outdir/${accession}_map_frag.png"; |
157 | - my $qstring = ($quiet)? "-q -o $outPlot" : "-o $outPlot"; | |
158 | - my $iString = ($interactive)? "-o $outPlot --show" : ""; | |
157 | + my $qstring = ($quiet)? "-q" : ""; | |
158 | + my $iString = "-o $outPlot"; | |
159 | 159 | |
160 | - my $cmd = qq(quod.py $qstring $iString -l "$accession ($coords)" --xticks 25 --grid $regions -- $sequence); | |
161 | - print "$cmd\n"; | |
160 | + my $cmd = qq(quod.py $qstring $iString -l "$accession ($coords)" --xticks 25 --grid $regions -- $sequence 2>/dev/null); | |
161 | + #print "$cmd\n"; | |
162 | 162 | system $cmd; |
163 | 163 | } |
164 | 164 | |
... | ... | @@ -175,7 +175,7 @@ |
175 | 175 | |
176 | 176 | |
177 | 177 | #Sequence for full protein |
178 | - my $accSeq = (-f $accFile)? $accFile : "$outdir/${acc}.faa"; | |
178 | + my $accSeq = ($accFile && -f $accFile)? $accFile : "$outdir/${acc}.faa"; | |
179 | 179 | |
180 | 180 | |
181 | 181 | #Save fragment to file |
... | ... | @@ -201,7 +201,7 @@ |
201 | 201 | system $cmd; |
202 | 202 | |
203 | 203 | #Remove the version and annotations from the sequence file |
204 | - my $cmd2 = qq(perl -i.bkp -pe 's/^\>(\w+)\.\*/\>\$1/;' $accSeq); | |
204 | + my $cmd2 = qq(perl -i -pe 's/^\>(\w+)\.\*/\>\$1/;' $accSeq); | |
205 | 205 | system $cmd2 unless (-f "${accSeq}.pkp"); |
206 | 206 | } |
207 | 207 | |
... | ... | @@ -274,7 +274,7 @@ |
274 | 274 | my ($opt, $value) = @_; |
275 | 275 | |
276 | 276 | #Remove version number if any |
277 | - $value =~ s/\.\d+$//; | |
277 | + #$value =~ s/\.\d+$//; | |
278 | 278 | |
279 | 279 | $accession = $value; |
280 | 280 | } |
281 | 281 | |
282 | 282 | |
... | ... | @@ -300,14 +300,31 @@ |
300 | 300 | sub read_blastdb { |
301 | 301 | my ($opt, $value) = @_; |
302 | 302 | |
303 | - my $tmpFile = "${value}.pin"; | |
304 | - | |
305 | - unless (-f $tmpFile && !(-z $tmpFile)) { | |
306 | - die "Error in option -bdb: Blast DB does not exist! -> $value"; | |
303 | + #In case the user provided the whole path to the blastDB. | |
304 | + my $exists1 = qx(ls ${value}*.pin 2>/dev/null); | |
305 | + if ($exists1) { | |
306 | + $blastdb = $value; | |
307 | + return; | |
307 | 308 | } |
308 | 309 | |
309 | - $blastdb = $value; | |
310 | 310 | |
311 | + #In case the user provided only the blastDB name | |
312 | + my @paths = split(/:/, $ENV{BLASTDB}); | |
313 | + my $found = 0; | |
314 | + | |
315 | + foreach my $dir (@paths) { | |
316 | + my $exists2 = qx(ls $dir/${value}*.pin 2>/dev/null); | |
317 | + if ($exists2) { | |
318 | + $blastdb = "$dir/$value"; | |
319 | + $found = 1; | |
320 | + last; | |
321 | + } | |
322 | + } | |
323 | + | |
324 | + #Report an error if BlastDB was not found | |
325 | + unless ($found) { | |
326 | + die "Error in option -bdb: BlastDB not found! -> $value"; | |
327 | + } | |
311 | 328 | } |
312 | 329 | |
313 | 330 | |
... | ... | @@ -354,8 +371,8 @@ |
354 | 371 | -o, --outdir {PATH} (Optional. Default: ./) |
355 | 372 | Path to output directory. |
356 | 373 | |
357 | --bdb {PATH} (Optional. Default: nr) | |
358 | - Path to the BLAST database where accessions will be extracted from. | |
374 | +-bdb {PATH} (Optional. Default: uniref90) | |
375 | + Path or name of the BLAST database where sequences will be extracted from. | |
359 | 376 | |
360 | 377 | -e, --evalue {FLOAT} (Optional. Default: 0.01) |
361 | 378 | E-value cut off when comparing full proteins |
locfrag.sh
View file @
538dec8
... | ... | @@ -4,12 +4,13 @@ |
4 | 4 | then |
5 | 5 | echo "Locate aligned fragments within 2 full proteins" |
6 | 6 | echo "Arguments:" |
7 | - echo " 1. NCBI Accession of first protein 1" | |
7 | + echo " 1. Accession of protein 1" | |
8 | 8 | echo " 2. Aligned fragment of protein 1" |
9 | - echo " 3. NCBI Accession of first protein 2" | |
9 | + echo " 3. Accession of protein 2" | |
10 | 10 | echo " 4. Aligned fragment of protein 2" |
11 | - echo " 5. (Optional) substitution matrix to use. (Defaul: BL50)" | |
12 | - echo " 6. (Optional) Indicate whether plots will be shown (Values: show/quiet; Default: show)" | |
11 | + echo " 5. (optional) blast DB to extract sequences from (Default: uniref90)" | |
12 | + echo " 6. (Optional) substitution matrix to use. (Defaul: BL50)" | |
13 | + echo " 7. (Optional) Indicate whether plots will be shown (Values: show/quiet; Default: show)" | |
13 | 14 | exit 1 |
14 | 15 | fi |
15 | 16 | |
16 | 17 | |
17 | 18 | |
18 | 19 | |
19 | 20 | |
20 | 21 | |
21 | 22 | |
... | ... | @@ -18,23 +19,33 @@ |
18 | 19 | #Define the substitution matrix to work with |
19 | 20 | mat="BL50" |
20 | 21 | mode="" |
22 | +db="uniref90" | |
21 | 23 | |
24 | +#Identify the blast DB to extract sequences from | |
25 | +if [[ ! -z "$5" ]] && ([[ "$5" != "quiet" ]] && [[ "$5" != "show" ]] && [[ "$5" != "uniref90" ]]) | |
26 | +then | |
27 | + db="$5" | |
28 | +fi | |
22 | 29 | |
23 | 30 | #Identify the type of substitution matrix, if given |
24 | -if [[ ! -z "$5" ]] && ([[ "$5" != "quiet" ]] && [[ "$5" != "show" ]]) | |
31 | +if [[ ! -z "$6" ]] && ([[ "$6" != "quiet" ]] && [[ "$6" != "show" ]] && [[ "$6" != "uniref90" ]]) | |
25 | 32 | then |
26 | - mat=$5 | |
33 | + mat="$6" | |
27 | 34 | fi |
28 | 35 | |
29 | - | |
30 | 36 | #Check the mode of operation: quiet/show |
31 | -if [[ "$5" == "quiet" ]] || [[ "$6" == "quiet" ]] | |
37 | +if [[ "$6" == "quiet" ]] || [[ "$7" == "quiet" ]] | |
32 | 38 | then |
33 | 39 | mode="-q" |
34 | 40 | fi |
35 | 41 | |
36 | -locateFragment.pl -a $1 -f $2 $mode | |
37 | -locateFragment.pl -a $3 -f $4 $mode | |
42 | + | |
43 | + | |
44 | +#localizing fragments | |
45 | +locateFragment.pl -a $1 -f $2 $mode -bdb $db | |
46 | +locateFragment.pl -a $3 -f $4 $mode -bdb $db | |
47 | + | |
48 | +#Aligning fragments and full sequences | |
38 | 49 | alignSeqsFiles.pl -q $1_frag.faa -ql $1_frag -s $3_frag.faa -sl $3_frag -e 0.1 -c 20 -cc X -m $mat |
39 | 50 | alignSeqsFiles.pl -q $1.faa -ql $1 -s $3.faa -sl $3 -e 0.1 -c 5 -cc X -m $mat |
40 | 51 |