Commit 538dec87bcf488605c35aa8fd2e872326adff65d

Authored by Luis Arturo Medrano-Soto
1 parent bbf0ef5765
Exists in master

updated scripts to run with new versions of quod and extractTCDB

Showing 3 changed files with 56 additions and 28 deletions Side-by-side Diff

alignSeqsFiles.pl View file @ 538dec8
... ... @@ -482,7 +482,7 @@
482 482 my $ylimStr = "--ylim -3 3";
483 483  
484 484  
485   - #Note alnquod requires to add the extension to the image name
  485 + #Plot alignment
486 486 my $alnFig = "$plotsDir/${q}_vs_${s}_qs${qs}_qe${qe}_ss${ss}_se${se}.png";
487 487 my $cmd1 = qq(quod.py -q $labelsStr -l "$q (red) and $s (blue)" -o $alnFig $ylimStr --edgecolor +0:red +1:blue --facecolor +0:orange +1:cyan --multi frag -- $qalnFile $seqDir/${q}.faa $salnFile $seqDir/${s}.faa);
488 488 # print "$cmd1\n\n";
... ... @@ -673,7 +673,7 @@
673 673  
674 674 print "\nRunning hmmscan and parsing output....\n";
675 675  
676   - my $pfamDB = ($ENV{PFAMDB})? $ENV{PFAMDB} : "$ENV{RESEARCH_DATA}/pfam/pfamdb/Pfam-A.hmm";
  676 + my $pfamDB = ($ENV{PFAMDB})? $ENV{PFAMDB} : "$ENV{RESEARCH_DATA}/DB/domainDBs/xfamDB/Pfam-A.hmm";
677 677 my $cmd2 = qq(hmmscan --cpu 4 --noali --cut_ga -o /dev/null --domtblout $pfamFile $pfamDB $topHitsSeqs);
678 678 system $cmd2 unless (-f $pfamFile && !(-z $pfamFile));
679 679  
locateFragment.pl View file @ 538dec8
... ... @@ -44,9 +44,9 @@
44 44  
45 45 my $fragment = undef;
46 46 my $accession = undef;
47   -my $accFile = undef
  47 +my $accFile = undef;
48 48 my $outdir = undef;
49   -my $blastdb = undef;
  49 +my $blastdb = "uniref90";
50 50 my $evalue = 1e-2;
51 51 my $subMatrix = 'BL50';
52 52 my $quiet = 0;
53 53  
... ... @@ -154,11 +154,11 @@
154 154 }
155 155  
156 156 my $outPlot = "$outdir/${accession}_map_frag.png";
157   - my $qstring = ($quiet)? "-q -o $outPlot" : "-o $outPlot";
158   - my $iString = ($interactive)? "-o $outPlot --show" : "";
  157 + my $qstring = ($quiet)? "-q" : "";
  158 + my $iString = "-o $outPlot";
159 159  
160   - my $cmd = qq(quod.py $qstring $iString -l "$accession ($coords)" --xticks 25 --grid $regions -- $sequence);
161   - print "$cmd\n";
  160 + my $cmd = qq(quod.py $qstring $iString -l "$accession ($coords)" --xticks 25 --grid $regions -- $sequence 2>/dev/null);
  161 + #print "$cmd\n";
162 162 system $cmd;
163 163 }
164 164  
... ... @@ -175,7 +175,7 @@
175 175  
176 176  
177 177 #Sequence for full protein
178   - my $accSeq = (-f $accFile)? $accFile : "$outdir/${acc}.faa";
  178 + my $accSeq = ($accFile && -f $accFile)? $accFile : "$outdir/${acc}.faa";
179 179  
180 180  
181 181 #Save fragment to file
... ... @@ -201,7 +201,7 @@
201 201 system $cmd;
202 202  
203 203 #Remove the version and annotations from the sequence file
204   - my $cmd2 = qq(perl -i.bkp -pe 's/^\>(\w+)\.\*/\>\$1/;' $accSeq);
  204 + my $cmd2 = qq(perl -i -pe 's/^\>(\w+)\.\*/\>\$1/;' $accSeq);
205 205 system $cmd2 unless (-f "${accSeq}.pkp");
206 206 }
207 207  
... ... @@ -274,7 +274,7 @@
274 274 my ($opt, $value) = @_;
275 275  
276 276 #Remove version number if any
277   - $value =~ s/\.\d+$//;
  277 + #$value =~ s/\.\d+$//;
278 278  
279 279 $accession = $value;
280 280 }
281 281  
282 282  
... ... @@ -300,14 +300,31 @@
300 300 sub read_blastdb {
301 301 my ($opt, $value) = @_;
302 302  
303   - my $tmpFile = "${value}.pin";
304   -
305   - unless (-f $tmpFile && !(-z $tmpFile)) {
306   - die "Error in option -bdb: Blast DB does not exist! -> $value";
  303 + #In case the user provided the whole path to the blastDB.
  304 + my $exists1 = qx(ls ${value}*.pin 2>/dev/null);
  305 + if ($exists1) {
  306 + $blastdb = $value;
  307 + return;
307 308 }
308 309  
309   - $blastdb = $value;
310 310  
  311 + #In case the user provided only the blastDB name
  312 + my @paths = split(/:/, $ENV{BLASTDB});
  313 + my $found = 0;
  314 +
  315 + foreach my $dir (@paths) {
  316 + my $exists2 = qx(ls $dir/${value}*.pin 2>/dev/null);
  317 + if ($exists2) {
  318 + $blastdb = "$dir/$value";
  319 + $found = 1;
  320 + last;
  321 + }
  322 + }
  323 +
  324 + #Report an error if BlastDB was not found
  325 + unless ($found) {
  326 + die "Error in option -bdb: BlastDB not found! -> $value";
  327 + }
311 328 }
312 329  
313 330  
... ... @@ -354,8 +371,8 @@
354 371 -o, --outdir {PATH} (Optional. Default: ./)
355 372 Path to output directory.
356 373  
357   --bdb {PATH} (Optional. Default: nr)
358   - Path to the BLAST database where accessions will be extracted from.
  374 +-bdb {PATH} (Optional. Default: uniref90)
  375 + Path or name of the BLAST database where sequences will be extracted from.
359 376  
360 377 -e, --evalue {FLOAT} (Optional. Default: 0.01)
361 378 E-value cut off when comparing full proteins
... ... @@ -4,12 +4,13 @@
4 4 then
5 5 echo "Locate aligned fragments within 2 full proteins"
6 6 echo "Arguments:"
7   - echo " 1. NCBI Accession of first protein 1"
  7 + echo " 1. Accession of protein 1"
8 8 echo " 2. Aligned fragment of protein 1"
9   - echo " 3. NCBI Accession of first protein 2"
  9 + echo " 3. Accession of protein 2"
10 10 echo " 4. Aligned fragment of protein 2"
11   - echo " 5. (Optional) substitution matrix to use. (Defaul: BL50)"
12   - echo " 6. (Optional) Indicate whether plots will be shown (Values: show/quiet; Default: show)"
  11 + echo " 5. (optional) blast DB to extract sequences from (Default: uniref90)"
  12 + echo " 6. (Optional) substitution matrix to use. (Defaul: BL50)"
  13 + echo " 7. (Optional) Indicate whether plots will be shown (Values: show/quiet; Default: show)"
13 14 exit 1
14 15 fi
15 16  
16 17  
17 18  
18 19  
19 20  
20 21  
21 22  
... ... @@ -18,23 +19,33 @@
18 19 #Define the substitution matrix to work with
19 20 mat="BL50"
20 21 mode=""
  22 +db="uniref90"
21 23  
  24 +#Identify the blast DB to extract sequences from
  25 +if [[ ! -z "$5" ]] && ([[ "$5" != "quiet" ]] && [[ "$5" != "show" ]] && [[ "$5" != "uniref90" ]])
  26 +then
  27 + db="$5"
  28 +fi
22 29  
23 30 #Identify the type of substitution matrix, if given
24   -if [[ ! -z "$5" ]] && ([[ "$5" != "quiet" ]] && [[ "$5" != "show" ]])
  31 +if [[ ! -z "$6" ]] && ([[ "$6" != "quiet" ]] && [[ "$6" != "show" ]] && [[ "$6" != "uniref90" ]])
25 32 then
26   - mat=$5
  33 + mat="$6"
27 34 fi
28 35  
29   -
30 36 #Check the mode of operation: quiet/show
31   -if [[ "$5" == "quiet" ]] || [[ "$6" == "quiet" ]]
  37 +if [[ "$6" == "quiet" ]] || [[ "$7" == "quiet" ]]
32 38 then
33 39 mode="-q"
34 40 fi
35 41  
36   -locateFragment.pl -a $1 -f $2 $mode
37   -locateFragment.pl -a $3 -f $4 $mode
  42 +
  43 +
  44 +#localizing fragments
  45 +locateFragment.pl -a $1 -f $2 $mode -bdb $db
  46 +locateFragment.pl -a $3 -f $4 $mode -bdb $db
  47 +
  48 +#Aligning fragments and full sequences
38 49 alignSeqsFiles.pl -q $1_frag.faa -ql $1_frag -s $3_frag.faa -sl $3_frag -e 0.1 -c 20 -cc X -m $mat
39 50 alignSeqsFiles.pl -q $1.faa -ql $1 -s $3.faa -sl $3 -e 0.1 -c 5 -cc X -m $mat
40 51