Commit 9f332056e88eedaa57bcc71267eb489bd80226c2

Authored by Luis Arturo Medrano-Soto
1 parent b344449ca4
Exists in master

Fixed annoying bug where the origial input file(s) to getDomainTopology.pl was(were) modified

Showing 1 changed file with 11 additions and 3 deletions Inline Diff

getDomainTopology.pl View file @ 9f33205
#!/usr/bin/env perl -w 1 1 #!/usr/bin/env perl -w
2 2
use strict; 3 3 use strict;
use warnings; 4 4 use warnings;
use Data::Dumper; 5 5 use Data::Dumper;
#use List::Util qw(sum); 6 6 #use List::Util qw(sum);
7 7
use TCDB::Assorted; 8 8 use TCDB::Assorted;
use TCDB::Domain::PfamParser; 9 9 use TCDB::Domain::PfamParser;
use TCDB::Domain::Characterize; 10 10 use TCDB::Domain::Characterize;
11 11
use Getopt::Long; 12 12 use Getopt::Long;
13 13
#========================================================================== 14 14 #==========================================================================
#Global variables 15 15 #Global variables
16 16
#Query family or families 17 17 #Query family or families
my @fams = (); 18 18 my @fams = ();
19 19
#This is an option for TCDB::Assorted::getSystemAccessions() 20 20 #This is an option for TCDB::Assorted::getSystemAccessions()
my $treatAsSuperfamily = 0; 21 21 my $treatAsSuperfamily = 0;
22 22
#Options for TCDB::Domain::PfamParser 23 23 #Options for TCDB::Domain::PfamParser
my $domain_cov = 0.7; 24 24 my $domain_cov = 0.7;
my $prot_cov = 0.1; 25 25 my $prot_cov = 0.1;
my $evalue = 1e-5; 26 26 my $evalue = 1e-5;
my $prop_prots_w_domain = 0.05; 27 27 my $prop_prots_w_domain = 0.05;
28 28
#Options for TCDB::Domain::Characterize 29 29 #Options for TCDB::Domain::Characterize
my $rootDir = "."; 30 30 my $rootDir = ".";
31 31
#To extract the TCIDs of refernece families 32 32 #To extract the TCIDs of refernece families
my $tcdbSeqsFile = "$ENV{RESEARCH_DATA}/pfam/download/tcdb.faa"; 33 33 my $tcdbSeqsFile = "$ENV{RESEARCH_DATA}/pfam/download/tcdb.faa";
my $pfamFile = "$ENV{RESEARCH_DATA}/pfam/tcdb.pfam-a.hmmscan.bz2"; 34 34 my $pfamFile = "$ENV{RESEARCH_DATA}/pfam/tcdb.pfam-a.hmmscan.bz2";
my $blastdb = "$ENV{HOME}/db/blastdb/tcdb"; 35 35 my $blastdb = "$ENV{HOME}/db/blastdb/tcdb";
my $prog = "ssearch36"; 36 36 my $prog = "ssearch36";
my @candProjProts = (); 37 37 my @candProjProts = ();
my $analysisLevel = 'system'; 38 38 my $analysisLevel = 'system';
39 39
40 40
41 41
#Read command line topology 42 42 #Read command line topology
read_command_line_arguments(); 43 43 read_command_line_arguments();
44 44
45 45
die "TCDB sequences file not found or empty --> $tcdbSeqsFile\n" unless (-f $tcdbSeqsFile && !(-z $tcdbSeqsFile)); 46 46 die "TCDB sequences file not found or empty --> $tcdbSeqsFile\n" unless (-f $tcdbSeqsFile && !(-z $tcdbSeqsFile));
die "TCDB hmmscan output file not found --> $pfamFile\n" unless (-f $pfamFile && !(-z $pfamFile)); 47 47 die "TCDB hmmscan output file not found --> $pfamFile\n" unless (-f $pfamFile && !(-z $pfamFile));
48 48
49 49
#print Data::Dumper->Dump([\@fams, $treatAsSuperfamily, $rootDir, $tcdbSeqsFile, $pfamFile, $blastdb, $prog, $domain_cov, 50 50 #print Data::Dumper->Dump([\@fams, $treatAsSuperfamily, $rootDir, $tcdbSeqsFile, $pfamFile, $blastdb, $prog, $domain_cov,
# $prot_cov, $evalue, $prop_prots_w_domain, \@candProjProts], 51 51 # $prot_cov, $evalue, $prop_prots_w_domain, \@candProjProts],
# [qw(*fams *treatAsSuperfamily *rootDir *tcdbSeqsFile *pfamFile *blastdb *prog *domain_cov 52 52 # [qw(*fams *treatAsSuperfamily *rootDir *tcdbSeqsFile *pfamFile *blastdb *prog *domain_cov
# *prot_cov *evalue *prop_prots_w_domain *candProjProts)]); 53 53 # *prot_cov *evalue *prop_prots_w_domain *candProjProts)]);
#exit; 54 54 #exit;
55 55
56 56
#========================================================================== 57 57 #==========================================================================
#Split tcdb systems into single-component multi-component 58 58 #Split tcdb systems into single-component multi-component
59 59
60 60
if ($treatAsSuperfamily) { 61 61 if ($treatAsSuperfamily) {
62 62
my $tcids = getSystemAccessions($tcdbSeqsFile, 'both', $analysisLevel, \@fams, $treatAsSuperfamily); 63 63 my $tcids = getSystemAccessions($tcdbSeqsFile, 'both', $analysisLevel, \@fams, $treatAsSuperfamily);
64 64
# print Data::Dumper->Dump([$tcids ], [qw(*tcids )]); 65 65 # print Data::Dumper->Dump([$tcids ], [qw(*tcids )]);
# exit; 66 66 # exit;
67 67
68 68
69 69
#========================================================================== 70 70 #==========================================================================
#Setup the thresholds for parsing the PFAM output 71 71 #Setup the thresholds for parsing the PFAM output
72 72
73 73
my $obj = new TCDB::Domain::PfamParser(); 74 74 my $obj = new TCDB::Domain::PfamParser();
$obj->pfamFile($pfamFile); 75 75 $obj->pfamFile($pfamFile);
$obj->analysisLevel($analysisLevel); 76 76 $obj->analysisLevel($analysisLevel);
$obj->domCovCutoff($domain_cov); 77 77 $obj->domCovCutoff($domain_cov);
$obj->tcCovCutoff($prot_cov); 78 78 $obj->tcCovCutoff($prot_cov);
$obj->evalueCutoff($evalue); 79 79 $obj->evalueCutoff($evalue);
$obj->minProtsDom($prop_prots_w_domain); 80 80 $obj->minProtsDom($prop_prots_w_domain);
$obj->treatAsSuperfamily($treatAsSuperfamily); 81 81 $obj->treatAsSuperfamily($treatAsSuperfamily);
82 82
83 83
my %domFreq = (); 84 84 my %domFreq = ();
my %domCoords = (); 85 85 my %domCoords = ();
$obj->getDomainStatsForUserFamilies(\@fams, $tcids, \%domFreq, \%domCoords); 86 86 $obj->getDomainStatsForUserFamilies(\@fams, $tcids, \%domFreq, \%domCoords);
87 87
# print Data::Dumper->Dump([ \%domFreq, \%domCoords ], [qw( *domFreq *domCoords )]); 88 88 # print Data::Dumper->Dump([ \%domFreq, \%domCoords ], [qw( *domFreq *domCoords )]);
# exit; 89 89 # exit;
90 90
91 91
92 92
93 93
94 94
#========================================================================== 95 95 #==========================================================================
#Attempt to rescue the domains that were not recognized by PFAM in some 96 96 #Attempt to rescue the domains that were not recognized by PFAM in some
#Family members 97 97 #Family members
98 98
99 99
my $rescueObj = new TCDB::Domain::Characterize(); 100 100 my $rescueObj = new TCDB::Domain::Characterize();
$rescueObj->rootDir($rootDir); 101 101 $rescueObj->rootDir($rootDir);
$rescueObj->tcdbFaa($tcdbSeqsFile); 102 102 $rescueObj->tcdbFaa($tcdbSeqsFile);
$rescueObj->domCoords(\%domCoords); 103 103 $rescueObj->domCoords(\%domCoords);
$rescueObj->domFreq(\%domFreq); 104 104 $rescueObj->domFreq(\%domFreq);
$rescueObj->tcids($tcids); 105 105 $rescueObj->tcids($tcids);
$rescueObj->searchWith($prog); 106 106 $rescueObj->searchWith($prog);
$rescueObj->blastdb($blastdb); 107 107 $rescueObj->blastdb($blastdb);
$rescueObj->evalue($evalue); 108 108 $rescueObj->evalue($evalue);
$rescueObj->treatAsSuperfamily($treatAsSuperfamily); 109 109 $rescueObj->treatAsSuperfamily($treatAsSuperfamily);
110 110
111 111
$rescueObj->rescueDomains(\@fams); 112 112 $rescueObj->rescueDomains(\@fams);
113 113
} 114 114 }
else { 115 115 else {
116 116
foreach my $fam (@fams) { 117 117 foreach my $fam (@fams) {
118 118
my $tcids = getSystemAccessions($tcdbSeqsFile, 'both', $analysisLevel, [$fam], $treatAsSuperfamily); 119 119 my $tcids = getSystemAccessions($tcdbSeqsFile, 'both', $analysisLevel, [$fam], $treatAsSuperfamily);
120 120
# print Data::Dumper->Dump([$tcids ], [qw( *tcids )]); 121 121 # print Data::Dumper->Dump([$tcids ], [qw( *tcids )]);
# exit; 122 122 # exit;
123 123
124 124
125 125
#========================================================================== 126 126 #==========================================================================
#Setup the thresholds for parsing the PFAM output 127 127 #Setup the thresholds for parsing the PFAM output
128 128
129 129
my $obj = new TCDB::Domain::PfamParser(); 130 130 my $obj = new TCDB::Domain::PfamParser();
$obj->pfamFile($pfamFile); 131 131 $obj->pfamFile($pfamFile);
$obj->analysisLevel($analysisLevel); 132 132 $obj->analysisLevel($analysisLevel);
$obj->domCovCutoff($domain_cov); 133 133 $obj->domCovCutoff($domain_cov);
$obj->tcCovCutoff($prot_cov); 134 134 $obj->tcCovCutoff($prot_cov);
$obj->evalueCutoff($evalue); 135 135 $obj->evalueCutoff($evalue);
$obj->minProtsDom($prop_prots_w_domain); 136 136 $obj->minProtsDom($prop_prots_w_domain);
137 137
138 138
my %domFreq = (); 139 139 my %domFreq = ();
my %domCoords = (); 140 140 my %domCoords = ();
$obj->getDomainStatsForUserFamilies([], $tcids, \%domFreq, \%domCoords); 141 141 $obj->getDomainStatsForUserFamilies([], $tcids, \%domFreq, \%domCoords);
142 142
#print Data::Dumper->Dump([ \%domFreq, \%domCoords ], [qw( *domFreq *domCoords )]); 143 143 #print Data::Dumper->Dump([ \%domFreq, \%domCoords ], [qw( *domFreq *domCoords )]);
#exit; 144 144 #exit;
145 145
146 146
147 147
148 148
149 149
#========================================================================== 150 150 #==========================================================================
#Attempt to rescue the domains that were not recognized by PFAM in some 151 151 #Attempt to rescue the domains that were not recognized by PFAM in some
#Family members 152 152 #Family members
153 153
154 154
my $rescueObj = new TCDB::Domain::Characterize(); 155 155 my $rescueObj = new TCDB::Domain::Characterize();
$rescueObj->rootDir($rootDir); 156 156 $rescueObj->rootDir($rootDir);
$rescueObj->tcdbFaa($tcdbSeqsFile); 157 157 $rescueObj->tcdbFaa($tcdbSeqsFile);
$rescueObj->domCoords(\%domCoords); 158 158 $rescueObj->domCoords(\%domCoords);
$rescueObj->domFreq(\%domFreq); 159 159 $rescueObj->domFreq(\%domFreq);
$rescueObj->tcids($tcids); 160 160 $rescueObj->tcids($tcids);
$rescueObj->searchWith($prog); 161 161 $rescueObj->searchWith($prog);
$rescueObj->blastdb($blastdb); 162 162 $rescueObj->blastdb($blastdb);
$rescueObj->evalue($evalue); 163 163 $rescueObj->evalue($evalue);
$rescueObj->domCovCutoff($domain_cov); 164 164 $rescueObj->domCovCutoff($domain_cov);
$rescueObj->treatAsSuperfamily($treatAsSuperfamily); 165 165 $rescueObj->treatAsSuperfamily($treatAsSuperfamily);
166 166
$rescueObj->rescueDomains(); 167 167 $rescueObj->rescueDomains();
} 168 168 }
} 169 169 }
170 170
171 171
172 172
173 173
########################################################################### 174 174 ###########################################################################
## Functions ## 175 175 ## Functions ##
########################################################################### 176 176 ###########################################################################
177 177
178 178
179 179
sub read_command_line_arguments { 180 180 sub read_command_line_arguments {
181 181
#if no arguments are given print the help 182 182 #if no arguments are given print the help
if (! @ARGV) { 183 183 if (! @ARGV) {
print_help(); 184 184 print_help();
} 185 185 }
186 186
#---------------------------------------------------------------------- 187 187 #----------------------------------------------------------------------
#Parse command line arguments 188 188 #Parse command line arguments
189 189
my $status = GetOptions( 190 190 my $status = GetOptions(
"f|family=s" => \&read_fams, #TCIDs of families to analyze (comma separated) 191 191 "f|family=s" => \&read_fams, #TCIDs of families to analyze (comma separated)
192 192
#Options for TCDB::Domain::PfamParser 193 193 #Options for TCDB::Domain::PfamParser
"dc|domain-cov=f" => \$domain_cov, 194 194 "dc|domain-cov=f" => \$domain_cov,
"pc|protein-cov=f" => \$prot_cov, 195 195 "pc|protein-cov=f" => \$prot_cov,
"e|evalue=f" => \$evalue, 196 196 "e|evalue=f" => \$evalue,
"m|prots-w-domain=f" => \$prop_prots_w_domain, 197 197 "m|prots-w-domain=f" => \$prop_prots_w_domain,
198 198
#Options for TCDB::Domain::Characterize 199 199 #Options for TCDB::Domain::Characterize
"pt|proj-targets=s" => \&read_proj_targets, #Target Proteins, NOT in TCDB, to project domains onto 200 200 "pt|proj-targets=s" => \&read_proj_targets, #Target Proteins, NOT in TCDB, to project domains onto
"o|outdir=s" => \&read_root_dir, #Ouput root directory 201 201 "o|outdir=s" => \&read_root_dir, #Ouput root directory
"s|tcdb-seqs=s" => \&read_tcdb_seqs, #File with all sequences in TCDB 202 202 "s|tcdb-seqs=s" => \&read_tcdb_seqs, #File with all sequences in TCDB
"sf|superfamily!" => \$treatAsSuperfamily, #File with the sequences of the reference family 203 203 "sf|superfamily!" => \$treatAsSuperfamily, #File with the sequences of the reference family
"pfam=s" => \&read_pfam, #hmmscan output file for whole TCDB 204 204 "pfam=s" => \&read_pfam, #hmmscan output file for whole TCDB
"b|blastdb=s" => \&read_blastdb, #Full path of blastdb to extract sequences 205 205 "b|blastdb=s" => \&read_blastdb, #Full path of blastdb to extract sequences
"p|rescue-prog=s" => \&read_prog, #Read the program that will be used to rescue domains (blastp|ssearch36) 206 206 "p|rescue-prog=s" => \&read_prog, #Read the program that will be used to rescue domains (blastp|ssearch36)
"h|help" => sub { print_help(); }, 207 207 "h|help" => sub { print_help(); },
208 208
#For arguments that do not look like valid options 209 209 #For arguments that do not look like valid options
"<>" => sub { die "Error: Unknown argument: $_[0]\n"; } 210 210 "<>" => sub { die "Error: Unknown argument: $_[0]\n"; }
); 211 211 );
die "\n" unless ($status); 212 212 die "\n" unless ($status);
213 213
#---------------------------------------------------------------------- 214 214 #----------------------------------------------------------------------
#Validate command line arguments 215 215 #Validate command line arguments
216 216
217 217
die "Error: Options -f and -pt are incompatible" if (@fams && @candProjProts); 218 218 die "Error: Options -f and -pt are incompatible" if (@fams && @candProjProts);
die "Error: either -f or -pt must be given" unless (@fams || @candProjProts); 219 219 die "Error: either -f or -pt must be given" unless (@fams || @candProjProts);
220 220
if (@candProjProts) { 221 221 if (@candProjProts) {
prepare_seqs_for_projection(); 222 222 prepare_seqs_for_projection();
} 223 223 }
224 224
} 225 225 }
226 226
227 227
#========================================================================== 228 228 #==========================================================================
#Setup the environment for projection of domains onto sequences that are 229 229 #Setup the environment for projection of domains onto sequences that are
#not present in TCDB. 230 230 #not present in TCDB.
231 231
sub prepare_seqs_for_projection { 232 232 sub prepare_seqs_for_projection {
233 233
my $tcdbDir = "$rootDir/tcdb"; 234 234 my $tcdbDir = "$rootDir/tcdb";
system "mkdir -p $tcdbDir" unless (-d $tcdbDir); 235 235 system "mkdir -p $tcdbDir" unless (-d $tcdbDir);
236 236
237 #to prevent modifying the original files, here I'll save the input
238 #sequences with the artificial TCIDs
239 my $origInfilesDir = "$rootDir/inputFiles";
240 system "mkdir -p $origInfilesDir" unless (-d $origInfilesDir);
241
242
#generate an empty "TCDB sequence file" that will contains proteins not in TCDB 237 243 #generate an empty "TCDB sequence file" that will contains proteins not in TCDB
my $new_tcdbSeqsFile = "$tcdbDir/tcdb.faa"; 238 244 my $new_tcdbSeqsFile = "$tcdbDir/tcdb.faa";
system "cat /dev/null > $new_tcdbSeqsFile"; 239 245 system "cat /dev/null > $new_tcdbSeqsFile";
240 246
241 247
#---------------------------------------------------------------------- 242 248 #----------------------------------------------------------------------
#generate the new TCDB database relevant for the projection 243 249 #generate the new TCDB database relevant for the projection
244 250
foreach my $pair (@candProjProts) { 245 251 foreach my $pair (@candProjProts) {
246 252
my $tcid = $pair->[0]; 247 253 my $tcid = $pair->[0];
my $tgtF = $pair->[1]; 248 254 my $tgtF = $pair->[1];
249 255
256 my @comp = split(/\//, $tgtF);
257 my $tgtFileName = $comp[-1];
250 258
#Add family to the main array (as if provided by the -f commandline option) 251 259 #Add family to the main array (as if provided by the -f commandline option)
push (@fams, $tcid); 252 260 push (@fams, $tcid);
253 261
254 262
#extracts the tcids of the systems under reference family 255 263 #extracts the tcids of the systems under reference family
# my $tcdbSeqs = $tcdbSeqsFile; #"$ENV{HOME}/db/blastdb/tcdb.faa"; 256 264 # my $tcdbSeqs = $tcdbSeqsFile; #"$ENV{HOME}/db/blastdb/tcdb.faa";
# die "TCDB sequences not found: $tcdbSeqs" unless (-f $tcdbSeqs); 257 265 # die "TCDB sequences not found: $tcdbSeqs" unless (-f $tcdbSeqs);
258 266
my $sysHash = TCDB::Assorted::getSystemAccessions($tcdbSeqsFile, 'both', 'system', [$tcid], 0); 259 267 my $sysHash = TCDB::Assorted::getSystemAccessions($tcdbSeqsFile, 'both', 'system', [$tcid], 0);
260 268
# print Data::Dumper->Dump([$sysHash, $tcdbSeqs], [qw(*sysHash *tcdbSeqs)]); 261 269 # print Data::Dumper->Dump([$sysHash, $tcdbSeqs], [qw(*sysHash *tcdbSeqs)]);
# <STDIN>; 262 270 # <STDIN>;
263 271
#determine the TCID that will be used as reference for the target sequences 264 272 #determine the TCID that will be used as reference for the target sequences
my @systems = @{ $sysHash->{$tcid} }; 265 273 my @systems = @{ $sysHash->{$tcid} };
die "Could not find TCIDs for $tcid in $tcdbSeqsFile" unless (@systems); 266 274 die "Could not find TCIDs for $tcid in $tcdbSeqsFile" unless (@systems);
267 275
my $tgtTC = $systems[-1]->[0]; 268 276 my $tgtTC = $systems[-1]->[0];
$tgtTC =~ s/\.\d+$/\.10000/; 269 277 $tgtTC =~ s/\.\d+$/\.10000/;
270 278
271 279
#Replace the TCID in the file corresponding to the target proteins 272 280 #Replace the TCID in the file corresponding to the target proteins
my $cmd1 = qq(perl -i.orig -pe 's/\\>([a-zA-Z0-9_-]+).*/\\>${tgtTC}-\$1/;' $tgtF); 273 281 my $cmd1 = qq(perl -pe 's/\\>([a-zA-Z0-9_-]+).*/\\>${tgtTC}-\$1/;' $tgtF > $origInfilesDir/$tgtFileName);
system $cmd1 unless (-f "${tgtF}.orig"); 274 282 system $cmd1 unless (-f "$origInfilesDir/$tgtFileName");
275 283
276 284
#Extract sequences for reference family 277 285 #Extract sequences for reference family
my $outFile = "$tcdbDir/tcdb-${tcid}.faa"; 278 286 my $outFile = "$tcdbDir/tcdb-${tcid}.faa";
my $cmd2 = qq(extractTCDB.pl -i $tcid -o $tcdbDir -d $tcdbSeqsFile); 279 287 my $cmd2 = qq(extractTCDB.pl -i $tcid -o $tcdbDir -d $tcdbSeqsFile);
system $cmd2 unless (-f $outFile); 280 288 system $cmd2 unless (-f $outFile);
die "Could not generate sequence file: $outFile" unless (-f $outFile); 281 289 die "Could not generate sequence file: $outFile" unless (-f $outFile);
282 290
283 291
#Add family and target sequences to the new TCDB family 284 292 #Add family and target sequences to the new TCDB family
my $cmd3 = qq(cat $outFile $tgtF >> $new_tcdbSeqsFile); 285 293 my $cmd3 = qq(cat $outFile $origInfilesDir/$tgtFileName >> $new_tcdbSeqsFile);
system $cmd3; 286 294 system $cmd3;
} 287 295 }
288 296
#---------------------------------------------------------------------- 289 297 #----------------------------------------------------------------------
#Generate the PFam database 290 298 #Generate the PFam database
291 299
my $pfamD = "$rootDir/pfam"; 292 300 my $pfamD = "$rootDir/pfam";
system "mkdir -p $pfamD" unless (-d $pfamD); 293 301 system "mkdir -p $pfamD" unless (-d $pfamD);
294 302
my $pfamTMPfile = "$pfamD/tcdb_pfam.out"; 295 303 my $pfamTMPfile = "$pfamD/tcdb_pfam.out";
$pfamFile = "${pfamTMPfile}.bz2"; 296 304 $pfamFile = "${pfamTMPfile}.bz2";
297 305
#run Pfam 298 306 #run Pfam
my $cmd4 = qq (hmmscan --cpu 4 --noali --cut_ga -o /dev/null --domtblout $pfamTMPfile $ENV{RESEARCH_DATA}/pfam/pfamdb/Pfam-A.hmm $new_tcdbSeqsFile); 299 307 my $cmd4 = qq (hmmscan --cpu 4 --noali --cut_ga -o /dev/null --domtblout $pfamTMPfile $ENV{RESEARCH_DATA}/pfam/pfamdb/Pfam-A.hmm $new_tcdbSeqsFile);
system $cmd4 unless (-f $pfamTMPfile || -f $pfamFile); 300 308 system $cmd4 unless (-f $pfamTMPfile || -f $pfamFile);
301 309
#compress pfam file 302 310 #compress pfam file
my $cmd5 = qq(bzip2 $pfamTMPfile); 303 311 my $cmd5 = qq(bzip2 $pfamTMPfile);
system $cmd5 unless (-f $pfamFile); 304 312 system $cmd5 unless (-f $pfamFile);
305 313
306 314
#---------------------------------------------------------------------- 307 315 #----------------------------------------------------------------------
#now generate the blast DB 308 316 #now generate the blast DB
309 317
my $blastD = "$rootDir/blastdb"; 310 318 my $blastD = "$rootDir/blastdb";
system "mkdir -p $blastD" unless (-d $blastD); 311 319 system "mkdir -p $blastD" unless (-d $blastD);
312 320
$blastdb = "$blastD/tcdb"; 313 321 $blastdb = "$blastD/tcdb";
314 322
my $cmd6 = qq(extractTCDB.pl -i tcdb -o $blastD -f blast -d $new_tcdbSeqsFile); 315 323 my $cmd6 = qq(extractTCDB.pl -i tcdb -o $blastD -f blast -d $new_tcdbSeqsFile);
system $cmd6 unless (-f "${blastdb}.pin"); 316 324 system $cmd6 unless (-f "${blastdb}.pin");
317 325
318 326
#For all purposes update the TCDB sequence file to point to the new "customized" file. 319 327 #For all purposes update the TCDB sequence file to point to the new "customized" file.
$tcdbSeqsFile = $new_tcdbSeqsFile; 320 328 $tcdbSeqsFile = $new_tcdbSeqsFile;
} 321 329 }
322 330
323 331
324 332
325 333
326 334
#========================================================================== 327 335 #==========================================================================
#Read the -pt option. It is expected that the user provides the family to which 328 336 #Read the -pt option. It is expected that the user provides the family to which
#the target proteins are expected to belong. Example format should is: 329 337 #the target proteins are expected to belong. Example format should is:
# -pt {tcid_1},{file with target sequences 1}:{tcid_2},{file with target sequences 2}. 330 338 # -pt {tcid_1},{file with target sequences 1}:{tcid_2},{file with target sequences 2}.
# 331 339 #
#NOTE: This option is incompatible with -f 332 340 #NOTE: This option is incompatible with -f
333 341
sub read_proj_targets { 334 342 sub read_proj_targets {
335 343
my ($opt, $value) = @_; 336 344 my ($opt, $value) = @_;
337 345
my @pairs = split (/:/, $value); 338 346 my @pairs = split (/:/, $value);
die "No significant argument passed to option -pt" unless (@pairs); 339 347 die "No significant argument passed to option -pt" unless (@pairs);