1
2
3
4
5
6
7 """Definitions for interacting with BLAST related applications.
8
9 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
10
11 - FastacmdCommandline
12 - BlastallCommandline
13 - BlastpgpCommandline
14 - RpsBlastCommandline
15
16 Wrappers for the new NCBI BLAST+ tools (written in C++):
17
18 - NcbiblastpCommandline - Protein-Protein BLAST
19 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
20 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
21 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
22 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
23 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
24 - NcbirpsblastCommandline - Reverse Position Specific BLAST
25 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
26
27 """
28 from Bio.Application import _Option, AbstractCommandline, _Switch
29
31 """Create a commandline for the fasta program from NCBI (OBSOLETE).
32
33 """
34 - def __init__(self, cmd="fastacmd", **kwargs):
35 self.parameters = \
36 [
37 _Option(["-d", "database"], ["input"], None, 1,
38 "The database to retrieve from."),
39 _Option(["-s", "search_string"], ["input"], None, 1,
40 "The id to search for.")
41 ]
42 AbstractCommandline.__init__(self, cmd, **kwargs)
43
44
46 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
47
48 This is provided for subclassing, it deals with shared options
49 common to all the BLAST tools (blastall, rpsblast, blastpgp).
50 """
52 assert cmd is not None
53 extra_parameters = [\
54 _Switch(["--help", "help"], ["input"],
55 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
56 _Option(["-d", "database"], ["input"], None, 1,
57 "The database to BLAST against.", False),
58 _Option(["-i", "infile"], ["input", "file"], None, 1,
59 "The sequence to search with.", False),
60 _Option(["-e", "expectation"], ["input"], None, 0,
61 "Expectation value cutoff.", False),
62 _Option(["-m", "align_view"], ["input"], None, 0,
63 "Alignment view. Integer 0-11. Use 7 for XML output.",
64 False),
65 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0,
66 "Output file for alignment.", False),
67 _Option(["-y", "xdrop_extension"], ["input"], None, 0,
68 "Dropoff for blast extensions.", False),
69 _Option(["-F", "filter"], ["input"], None, 0,
70 "Filter query sequence with SEG? T/F", False),
71 _Option(["-X", "xdrop"], ["input"], None, 0,
72 "Dropoff value (bits) for gapped alignments."),
73 _Option(["-I", "show_gi"], ["input"], None, 0,
74 "Show GI's in deflines? T/F", False),
75 _Option(["-J", "believe_query"], ["input"], None, 0,
76 "Believe the query defline? T/F", False),
77 _Option(["-Z", "xdrop_final"], ["input"], None, 0,
78 "X dropoff for final gapped alignment.", False),
79 _Option(["-z", "db_length"], ["input"], None, 0,
80 "Effective database length.", False),
81 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0,
82 "seqalign file to output.", False),
83 _Option(["-v", "descriptions"], ["input"], None, 0,
84 "Number of one-line descriptions.", False),
85 _Option(["-b", "alignments"], ["input"], None, 0,
86 "Number of alignments.", False),
87 _Option(["-Y", "search_length"], ["input"], None, 0,
88 "Effective length of search space (use zero for the " + \
89 "real size).", False),
90 _Option(["-T", "html"], ["input"], None, 0,
91 "Produce HTML output? T/F", False),
92 _Option(["-U", "case_filter"], ["input"], None, 0,
93 "Use lower case filtering of FASTA sequence? T/F", False),
94
95 _Option(["-a", "nprocessors"], ["input"], None, 0,
96 "Number of processors to use.", False),
97 _Option(["-g", "gapped"], ["input"], None, 0,
98 "Whether to do a gapped alignment. T/F", False),
99 ]
100 try:
101
102
103 self.parameters = extra_parameters + self.parameters
104 except AttributeError:
105
106 self.parameters = extra_parameters
107 AbstractCommandline.__init__(self, cmd, **kwargs)
108
114
115
117 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
118
119 This is provided for subclassing, it deals with shared options
120 common to all the blastall and blastpgp tools (but not rpsblast).
121 """
122 - def __init__(self, cmd=None, **kwargs):
123 assert cmd is not None
124 extra_parameters = [\
125 _Option(["-G", "gap_open"], ["input"], None, 0,
126 "Gap open penalty", False),
127 _Option(["-E", "gap_extend"], ["input"], None, 0,
128 "Gap extension penalty", False),
129 _Option(["-A", "window_size"], ["input"], None, 0,
130 "Multiple hits window size", False),
131 _Option(["-f", "hit_extend"], ["input"], None, 0,
132 "Threshold for extending hits.", False),
133 _Option(["-K", "keep_hits"], ["input"], None, 0,
134 " Number of best hits from a region to keep.", False),
135 _Option(["-W", "wordsize"], ["input"], None, 0,
136 "Word size", False),
137 _Option(["-P", "passes"], ["input"], None, 0,
138 "Hits/passes. Integer 0-2. 0 for multiple hit, "
139 "1 for single hit (does not apply to blastn)", False),
140 ]
141 try:
142
143
144 self.parameters = extra_parameters + self.parameters
145 except AttributeError:
146
147 self.parameters = extra_parameters
148 _BlastCommandLine.__init__(self, cmd, **kwargs)
149
150
152 """Create a commandline for the blastall program from NCBI (OBSOLETE).
153
154 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
155 are replacing blastall with separate tools blastn, blastp, blastx, tblastn
156 and tblastx.
157
158 Like blastall, this wrapper is now obsolete, and will be deprecated and
159 removed in a future release of Biopython.
160
161 >>> from Bio.Blast.Applications import BlastallCommandline
162 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
163 ... database="nr", expectation=0.001)
164 >>> cline
165 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
166 >>> print cline
167 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
168
169 You would typically run the command line with the Python subprocess module,
170 as described in the Biopython tutorial.
171 """
172
173 - def __init__(self, cmd="blastall",**kwargs):
174 self.parameters = [ \
175
176
177
178 _Option(["-p", "program"], ["input"], None, 1,
179 "The blast program to use (e.g. blastp, blastn).", False),
180 _Option(["-q", "nuc_mismatch"], ["input"], None, 0,
181 "Penalty for a nucleotide mismatch (blastn only).", False),
182 _Option(["-r", "nuc_match"], ["input"], None, 0,
183 "Reward for a nucleotide match (blastn only).", False),
184 _Option(["-Q", "query_genetic_code"], ["input"], None, 0,
185 "Query Genetic code to use.", False),
186 _Option(["-D", "db_genetic_code"], ["input"], None, 0,
187 "DB Genetic code (for tblast[nx] only).", False),
188 _Option(["-M", "matrix"], ["input"], None, 0,
189 "Matrix to use", False),
190 _Option(["-S", "strands"], ["input"], None, 0,
191 "Query strands to search against database (for blast[nx], " + \
192 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False),
193 _Option(["-l", "restrict_gi"], ["input"], None, 0,
194 "Restrict search of database to list of GI's.", False),
195 _Option(["-R", "checkpoint"], ["input", "file"], None, 0,
196 "PSI-TBLASTN checkpoint input file.", False),
197 _Option(["-n", "megablast"], ["input"], None, 0,
198 "MegaBlast search T/F.", False),
199
200
201 _Option(["-L", "region_length", "range_restriction"], ["input"],
202 None, 0,
203 """Location on query sequence (string format start,end).
204
205 In older versions of BLAST, -L set the length of region
206 used to judge hits (see -K parameter).""", False),
207 _Option(["-w", "frame_shit_penalty"], ["input"], None, 0,
208 "Frame shift penalty (OOF algorithm for blastx).", False),
209 _Option(["-t", "largest_intron"], ["input"], None, 0,
210 "Length of the largest intron allowed in a translated " + \
211 "nucleotide sequence when linking multiple distinct " + \
212 "alignments. (0 invokes default behavior; a negative value " + \
213 "disables linking.)", False),
214 _Option(["-B", "num_concatenated_queries"], ["input"], None, 0,
215 "Number of concatenated queries, for blastn and tblastn.",
216 False),
217 _Option(["-V", "oldengine"], ["input"], None, 0,
218 "Force use of the legacy BLAST engine.", False),
219 _Option(["-C", "composition_based"], ["input"], None, 0,
220 """Use composition-based statistics for tblastn:
221 D or d: default (equivalent to F)
222 0 or F or f: no composition-based statistics
223 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
224 2: Composition-based score adjustment as in Bioinformatics
225 21:902-911, 2005, conditioned on sequence properties
226 3: Composition-based score adjustment as in Bioinformatics
227 21:902-911, 2005, unconditionally
228 For programs other than tblastn, must either be absent or be
229 D, F or 0.""", False),
230 _Option(["-s", "smith_waterman"], ["input"], None, 0,
231 "Compute locally optimal Smith-Waterman alignments (This " + \
232 "option is only available for gapped tblastn.) T/F", False),
233 ]
234 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
235
236
238 """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
239
240 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
241 are replacing blastpgp with a renamed tool psiblast. This module provides
242 NcbipsiblastCommandline as a wrapper for the new tool psiblast.
243
244 Like blastpgp (and blastall), this wrapper is now obsolete, and will be
245 deprecated and removed in a future release of Biopython.
246
247 >>> from Bio.Blast.Applications import BlastpgpCommandline
248 >>> cline = BlastpgpCommandline(help=True)
249 >>> cline
250 BlastpgpCommandline(cmd='blastpgp', help=True)
251 >>> print cline
252 blastpgp --help
253
254 You would typically run the command line with the Python subprocess module,
255 as described in the Biopython tutorial.
256 """
257 - def __init__(self, cmd="blastpgp",**kwargs):
258 self.parameters = [ \
259 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0,
260 "Output file for PSI-BLAST checkpointing.", False),
261 _Option(["-R", "restart_infile"], ["input", "file"], None, 0,
262 "Input file for PSI-BLAST restart.", False),
263 _Option(["-k", "hit_infile"], ["input", "file"], None, 0,
264 "Hit file for PHI-BLAST.", False),
265 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0,
266 "Output file for PSI-BLAST matrix in ASCII.", False),
267 _Option(["-B", "align_infile"], ["input", "file"], None, 0,
268 "Input alignment file for PSI-BLAST restart.", False),
269 _Option(["-S", "required_start"], ["input"], None, 0,
270 "Start of required region in query.", False),
271 _Option(["-H", "required_end"], ["input"], None, 0,
272 "End of required region in query.", False),
273 _Option(["-j", "npasses"], ["input"], None, 0,
274 "Number of passes", False),
275 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
276 "Number of bits to trigger gapping.", False),
277 _Option(["-c", "pseudocounts"], ["input"], None, 0,
278 "Pseudocounts constants for multiple passes.", False),
279 _Option(["-h", "model_threshold"], ["input"], None, 0,
280 "E-value threshold to include in multipass model.", False),
281
282 _Option(["-L", "region_length"], ["input"], None, 0,
283 "Cost to decline alignment (disabled when zero).", False),
284 _Option(["-M", "matrix"], ["input"], None, 0,
285 "Matrix (string, default BLOSUM62).", False),
286 _Option(["-p", "program"], ["input"], None, 1,
287 "The blast program to use (e.g blastpgp, patseedp or seedp).", False),
288 ]
289 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
290
291
293 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
294
295 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
296 are replacing the old rpsblast with a new version of the same name plus a
297 second tool rpstblastn, both taking different command line arguments. This
298 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
299 wrappers for the new tools.
300
301 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
302 be deprecated and removed in a future release of Biopython.
303
304 >>> from Bio.Blast.Applications import RpsBlastCommandline
305 >>> cline = RpsBlastCommandline(help=True)
306 >>> cline
307 RpsBlastCommandline(cmd='rpsblast', help=True)
308 >>> print cline
309 rpsblast --help
310
311 You would typically run the command line with the Python subprocess module,
312 as described in the Biopython tutorial.
313 """
314 - def __init__(self, cmd="rpsblast",**kwargs):
315 self.parameters = [ \
316
317 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
318 "Number of bits to trigger gapping.", False),
319
320
321 _Option(["-P", "multihit"], ["input"], None, 0,
322 "0 for multiple hit, 1 for single hit", False),
323 _Option(["-l", "logfile"], ["output", "file"], None, 0,
324 "Logfile name.", False),
325 _Option(["-p", "protein"], ["input"], None, 0,
326 "Query sequence is protein. T/F", False),
327 _Option(["-L", "range_restriction"], ["input"], None, 0,
328 "Location on query sequence (string format start,end).",
329 False),
330 ]
331 _BlastCommandLine.__init__(self, cmd, **kwargs)
332
333
335 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
336
337 This is provided for subclassing, it deals with shared options
338 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
339 """
340 - def __init__(self, cmd=None, **kwargs):
341 assert cmd is not None
342 extra_parameters = [ \
343
344 _Switch(["-h", "h"], ["input"],
345 "Print USAGE and DESCRIPTION; ignore other arguments."),
346 _Switch(["-help", "help"], ["input"],
347 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
348 _Switch(["-version", "version"], ["input"],
349 "Print version number; ignore other arguments."),
350
351 _Option(["-query", "query"], ["input", "file"], None, 0,
352 "The sequence to search with.", False),
353 _Option(["-query_loc", "query_loc"], ["input"], None, 0,
354 "Location on the query sequence (Format: start-stop)", False),
355
356 _Option(["-db", "db"], ["input"], None, 0,
357 "The database to BLAST against.", False),
358 _Option(["-out", "out"], ["output", "file"], None, 0,
359 "Output file for alignment.", False),
360 _Option(["-evalue", "evalue"], ["input"], None, 0,
361 "Expectation value cutoff.", False),
362 _Option(["-word_size","word_size"], ["input"], None, 0,
363 """Word size for wordfinder algorithm.
364
365 Integer. Minimum 2.""", False),
366
367
368
369 _Option(["-outfmt", "outfmt"], ["input"], None, 0,
370 "Alignment view. Integer 0-10. Use 5 for XML output (differs from classic BLAST which used 7 for XML).",
371 False),
372 _Switch(["-show_gis","show_gis"], ["input"],
373 "Show NCBI GIs in deflines?"),
374 _Option(["-num_descriptions","num_descriptions"], ["input"], None, 0,
375 """Number of database sequences to show one-line descriptions for.
376
377 Integer argument (at least zero). Default is 500.
378 See also num_alignments.""", False),
379 _Option(["-num_alignments","num_alignments"], ["input"], None, 0,
380 """Number of database sequences to show num_alignments for.
381
382 Integer argument (at least zero). Default is 200.
383 See also num_alignments.""", False),
384 _Switch(["-html", "html"], ["input"],
385 "Produce HTML output? See also the outfmt option."),
386
387
388
389
390 _Switch(["-lcase_masking", "lcase_masking"], ["input"],
391 "Use lower case filtering in query and subject sequence(s)?"),
392
393 _Option(["-gilist", "gilist"], ["input", "file"], None, 0,
394 """Restrict search of database to list of GI's.
395
396 Incompatible with: negative_gilist, remote, subject, subject_loc""",
397 False),
398 _Option(["-negative_gilist", "negative_gilist"], ["input", "file"], None, 0,
399 """Restrict search of database to everything except the listed GIs.
400
401 Incompatible with: gilist, remote, subject, subject_loc""",
402 False),
403 _Option(["-entrez_query", "entrez_query"], ["input"], None, 0,
404 "Restrict search with the given Entrez query (requires remote).", False),
405 _Option(["-max_target_seqs", "max_target_seqs"], ["input"], None, 0,
406 """Maximum number of aligned sequences to keep.
407
408 Integer argument (at least one).""", False),
409
410 _Option(["-dbsize", "dbsize"], ["input"], None, 0,
411 "Effective length of the database (integer)", False),
412 _Option(["-searchsp", "searchsp"], ["input"], None, 0,
413 "Effective length of the search space (integer)", False),
414
415 _Option(["-xdrop_ungap", "xdrop_ungap"], ["input"], None, 0,
416 "X-dropoff value (in bits) for ungapped extensions. Float.",
417 False),
418 _Option(["-xdrop_gap", "xdrop_gap"], ["input"], None, 0,
419 "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
420 False),
421 _Option(["-xdrop_gap_final", "xdrop_gap_final"], ["input"], None, 0,
422 "X-dropoff value (in bits) for final gapped alignment. Float.",
423 False),
424 _Option(["-window_size", "window_size"], ["input"], None, 0,
425 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
426 False),
427
428 _Option(["-import_search_strategy", "import_search_strategy"],
429 ["input", "file"], None, 0,
430 """Search strategy to use.
431
432 Incompatible with: export_search_strategy""", False),
433 _Option(["-export_search_strategy", "export_search_strategy"],
434 ["output", "file"], None, 0,
435 """File name to record the search strategy used.
436
437 Incompatible with: import_search_strategy""", False),
438
439 _Switch(["-parse_deflines", "parse_deflines"], ["input"],
440 "Should the query and subject defline(s) be parsed?"),
441 _Option(["-num_threads", "num_threads"], ["input"], None, 0,
442 """Number of threads to use in the BLAST search.
443
444 Integer of at least one. Default is one.
445 Incompatible with: remote""", False),
446 _Switch(["-remote", "remote"], ["input"],
447 """Execute search remotely?
448
449 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
450 ]
451 try:
452
453
454 self.parameters = extra_parameters + self.parameters
455 except AttributeError:
456
457 self.parameters = extra_parameters
458 AbstractCommandline.__init__(self, cmd, **kwargs)
459
461 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
462 "import_search_strategy" : ["export_search_strategy"],
463 "gilist":["negative_gilist"]}
464 self._validate_incompatibilities(incompatibles)
465 if self.entrez_query and not self.remote :
466 raise ValueError("Option entrez_query requires remote option.")
467 AbstractCommandline._validate(self)
468
470 for a in incompatibles:
471 if self._get_parameter(a):
472 for b in incompatibles[a]:
473 if self._get_parameter(b):
474 raise ValueError("Options %s and %s are incompatible." \
475 % (a,b))
476
478 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
479
480 This is provided for subclassing, it deals with shared options
481 common to all the BLAST tools supporting two-sequence BLAST
482 (blastn, psiblast, etc) but not rpsblast or rpstblastn.
483 """
484 - def __init__(self, cmd=None, **kwargs):
485 assert cmd is not None
486 extra_parameters = [ \
487
488 _Option(["-gapopen", "gapopen"], ["input"], None, 0,
489 "Cost to open a gap (integer).", False),
490 _Option(["-gapextend", "gapextend"], ["input"], None, 0,
491 "Cost to extend a gap (integer).", False),
492
493 _Option(["-subject", "subject"], ["input", "file"], None, 0,
494 """Subject sequence(s) to search.
495
496 Incompatible with: db, gilist, negative_gilist.
497 See also subject_loc.""", False),
498 _Option(["-subject_loc", "subject_loc"], ["input"], None, 0,
499 """Location on the subject sequence (Format: start-stop)
500
501 Incompatible with: db, gilist, negative_gilist, remote.
502 See also subject.""", False),
503
504 _Option(["-culling_limit", "culling_limit"], ["input"], None, 0,
505 """Hit culling limit (integer).
506
507 If the query range of a hit is enveloped by that of at least this many
508 higher-scoring hits, delete the hit.
509
510 Incompatible with: best_hit_overhang, best_hit_score_edge.""", False),
511 _Option(["-best_hit_overhang", "best_hit_overhang"], ["input"], None, 0,
512 """Best Hit algorithm overhang value (recommended value: 0.1)
513
514 Float between 0.0 and 0.5 inclusive.
515
516 Incompatible with: culling_limit.""", False),
517 _Option(["-best_hit_score_edge", "best_hit_score_edge"], ["input"], None, 0,
518 """Best Hit algorithm score edge value (recommended value: 0.1)
519
520 Float between 0.0 and 0.5 inclusive.
521
522 Incompatible with: culling_limit.""", False), ]
523 try:
524
525
526 self.parameters = extra_parameters + self.parameters
527 except AttributeError:
528
529 self.parameters = extra_parameters
530 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
531
532
534 incompatibles = {"subject_loc":["db, gilist, negative_gilist, remote"],
535 "culling_limit":["best_hit_overhang","best_hit_score_edge"],
536 "subject":["db", "gilist", "negative_gilist"]}
537 self._validate_incompatibilities(incompatibles)
538 _NcbiblastCommandline._validate(self)
539
541 """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
542
543 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
544 replaced the old blastall tool with separate tools for each of the searches.
545 This wrapper therefore replaces BlastallCommandline with option -p blastp.
546
547 >>> from Bio.Blast.Applications import NcbiblastpCommandline
548 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
549 ... evalue=0.001, remote=True, ungapped=True)
550 >>> cline
551 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
552 >>> print cline
553 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
554
555 You would typically run the command line with the Python subprocess module,
556 as described in the Biopython tutorial.
557 """
558 - def __init__(self, cmd="blastp", **kwargs):
559 self.parameters = [ \
560
561 _Option(["-task", "task"], ["input"],
562 lambda value : value in ["blastp", "blastp-short"], 0,
563 "Task to execute (string, blastp (default) or blastp-short).", False),
564 _Option(["-matrix", "matrix"], ["input"], None, 0,
565 "Scoring matrix name (default BLOSUM62).", False),
566 _Option(["-threshold", "threshold"], ["input"], None, 0,
567 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
568 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
569 lambda value : value in "0Ft2TtDd", 0,
570 """Use composition-based statistics (string, default 2, i.e. True).
571
572 0, F or f: no composition-based statistics
573 2, T or t, D or d : Composition-based score adjustment as in
574 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
575
576 Note that tblastn also supports values of 1 and 3.""", False),
577
578 _Option(["-seg", "seg"], ["input"], None, 0,
579 """Filter query sequence with SEG (string).
580
581 Format: "yes", "window locut hicut", or "no" to disable.
582 Default is "12 2.2 2.5""", False),
583
584 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
585 """Filtering algorithm for soft masking (integer).
586
587 Filtering algorithm ID to apply to the BLAST database as soft masking.
588
589 Incompatible with: subject, subject_loc""", False),
590
591 _Switch(["-ungapped", "ungapped"], ["input"],
592 "Perform ungapped alignment only?"),
593
594 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
595 "Compute locally optimal Smith-Waterman alignments?"),
596 ]
597 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
598
603
604
606 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
607
608 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
609 replaced the old blastall tool with separate tools for each of the searches.
610 This wrapper therefore replaces BlastallCommandline with option -p blastn.
611
612 For example, to run a search against the "nt" nucleotide database using the
613 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
614 cut off of 0.001, saving the output to a file in XML format:
615
616 >>> from Bio.Blast.Applications import NcbiblastnCommandline
617 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
618 ... evalue=0.001, out="m_cold.xml", outfmt=5)
619 >>> cline
620 NcbiblastnCommandline(cmd='blastn', query='m_cold.fasta', db='nt', out='m_cold.xml', evalue=0.001, outfmt=5, strand='plus')
621 >>> print cline
622 blastn -query m_cold.fasta -db nt -out m_cold.xml -evalue 0.001 -outfmt 5 -strand plus
623
624 You would typically run the command line with the Python subprocess module,
625 as described in the Biopython tutorial.
626 """
627 - def __init__(self, cmd="blastn", **kwargs):
628 self.parameters = [ \
629
630 _Option(["-strand", "strand"], ["input"],
631 lambda value : value in ["both", "minus", "plus"],0,
632 """Query strand(s) to search against database/subject.
633
634 Values allowed are "both" (default), "minus", "plus".""", False),
635
636 _Option(["-task", "task"], ["input"],
637 lambda value : value in ['blastn', 'blastn-short', 'dc-megablast',
638 'megablast', 'vecscreen'], 0,
639 """Task to execute (string, default 'megablast')
640
641 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast'
642 (the default), or 'vecscreen'.""", False),
643 _Option(["-penalty", "penalty"], ["input"], None, 0,
644 "Penalty for a nucleotide mismatch (integer, at most zero).", False),
645 _Option(["-reward", "reward"], ["input"], None, 0,
646 "Reward for a nucleotide match (integer, at least zero).", False),
647
648
649
650 _Option(["-index_name", "index_name"], ["input"], None, 0,
651 "MegaBLAST database index name.", False),
652
653 _Option(["-dust", "dust"], ["input"], None, 0,
654 """Filter query sequence with DUST (string).
655
656 Format: 'yes', 'level window linker', or 'no' to disable.
657 Default = '20 64 1'.
658 """, False),
659 _Option(["-filtering_db", "filtering_db"], ["input"], None, 0,
660 "BLAST database containing filtering elements (i.e. repeats).", False),
661 _Option(["-window_masker_taxid", "window_masker_taxid"], ["input"], None, 0,
662 "Enable WindowMasker filtering using a Taxonomic ID (integer).", False),
663 _Option(["-window_masker_db", "window_masker_db"], ["input"], None, 0,
664 "Enable WindowMasker filtering using this repeats database (string).", False),
665
666 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
667 """Filtering algorithm for soft masking (integer).
668
669 Filtering algorithm ID to apply to the BLAST database as soft masking.
670
671 Incompatible with: subject, subject_loc""", False),
672 _Option(["-perc_identity", "perc_identity"], ["input"], None, 0,
673 "Percent identity (real, 0 to 100 inclusive).", False),
674
675 _Option(["-template_type", "template_type"], ["input"],
676 lambda value : value in ['coding', 'coding_and_optimal','optimal'], 0,
677 """Discontiguous MegaBLAST template type (string).
678
679 Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
680 Requires: template_length.""", False),
681 _Option(["-template_length", "template_length"], ["input"],
682 lambda value : value in [16,18,21,'16','18','21'], 0,
683 """Discontiguous MegaBLAST template length (integer).
684
685 Allowed values: 16, 18, 21
686
687 Requires: template_type.""", False),
688
689 _Switch(["-no_greedy", "no_greedy"], ["input"],
690 "Use non-greedy dynamic programming extension"),
691 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], ["input"], None, 0,
692 "Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).", False),
693 _Switch(["-ungapped", "ungapped"], ["input"],
694 "Perform ungapped alignment only?"),
695 ]
696 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
697
699 incompatibles = {"db_soft_mask":["subject", "subject_loc"]}
700 self._validate_incompatibilities(incompatibles)
701 if (self.template_type and not self.template_length) \
702 or (self.template_length and not self.template_type) :
703 raise ValueError("Options template_type and template_type require each other.")
704 _Ncbiblast2SeqCommandline._validate(self)
705
706
708 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).
709
710 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
711 replaced the old blastall tool with separate tools for each of the searches.
712 This wrapper therefore replaces BlastallCommandline with option -p blastx.
713
714 >>> from Bio.Blast.Applications import NcbiblastxCommandline
715 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
716 >>> cline
717 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
718 >>> print cline
719 blastx -query m_cold.fasta -db nr -evalue 0.001
720
721 You would typically run the command line with the Python subprocess module,
722 as described in the Biopython tutorial.
723 """
724 - def __init__(self, cmd="blastx", **kwargs):
725 self.parameters = [ \
726
727 _Option(["-strand", "strand"], ["input"],
728 lambda value : value in ["both", "minus", "plus"],0,
729 """Query strand(s) to search against database/subject.
730
731 Values allowed are "both" (default), "minus", "plus".""", False),
732
733 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
734 """Genetic code to use to translate query
735
736 Integer. Default is one.""", False),
737
738 _Option(["-frame_shift_penalty", "frame_shift_penalty"], ["input"], None, 0,
739 "Frame shift penalty (integer, at least 1, default ignored).", False),
740 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
741 """Maximum intron length (integer).
742
743 Length of the largest intron allowed in a translated nucleotide
744 sequence when linking multiple distinct alignments (a negative
745 value disables linking). Default zero.""", False),
746 _Option(["-matrix", "matrix"], ["input"], None, 0,
747 "Scoring matrix name (default BLOSUM62).", False),
748 _Option(["-threshold", "threshold"], ["input"], None, 0,
749 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
750
751 _Option(["-seg", "seg"], ["input"], None, 0,
752 """Filter query sequence with SEG (string).
753
754 Format: "yes", "window locut hicut", or "no" to disable.
755 Default is "12 2.2 2.5""", False),
756
757 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
758 """Filtering algorithm for soft masking (integer).
759
760 Filtering algorithm ID to apply to the BLAST database as soft masking.
761
762 Incompatible with: subject, subject_loc""", False),
763
764 _Switch(["-ungapped", "ungapped"], ["input"],
765 "Perform ungapped alignment only?"),
766 ]
767 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
768
773
774
776 """Wrapper for the NCBI BLAST+ program tblastn.
777
778 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
779 replaced the old blastall tool with separate tools for each of the searches.
780 This wrapper therefore replaces BlastallCommandline with option -p tblastn.
781
782 >>> from Bio.Blast.Applications import NcbitblastnCommandline
783 >>> cline = NcbitblastnCommandline(help=True)
784 >>> cline
785 NcbitblastnCommandline(cmd='tblastn', help=True)
786 >>> print cline
787 tblastn -help
788
789 You would typically run the command line with the Python subprocess module,
790 as described in the Biopython tutorial.
791 """
792 - def __init__(self, cmd="tblastn", **kwargs):
793 self.parameters = [ \
794
795 _Option(["-db_gencode", "db_gencode"], ["input"], None, 0,
796 """Genetic code to use to translate query
797
798 Integer. Default is one.""", False),
799 _Option(["-frame_shift_penalty", "frame_shift_penalty"], ["input"], None, 0,
800 "Frame shift penalty (integer, at least 1, default ignored).", False),
801 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
802 """Maximum intron length (integer).
803
804 Length of the largest intron allowed in a translated nucleotide
805 sequence when linking multiple distinct alignments (a negative
806 value disables linking). Default zero.""", False),
807 _Option(["-matrix", "matrix"], ["input"], None, 0,
808 "Scoring matrix name (default BLOSUM62).", False),
809 _Option(["-threshold", "threshold"], ["input"], None, 0,
810 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
811 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
812 lambda value : value in "0Ft12TtDd3", 0,
813 """Use composition-based statistics (string, default 2, i.e. True).
814
815 0, F or f: no composition-based statistics
816 1: Composition-based statistics as in NAR 29:2994-3005, 2001
817 2, T or t, D or d : Composition-based score adjustment as in
818 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
819 3: Composition-based score adjustment as in Bioinformatics 21:902-911,
820 2005, unconditionally
821
822 Note that only tblastn supports values of 1 and 3.""", False),
823
824 _Option(["-seg", "seg"], ["input"], None, 0,
825 """Filter query sequence with SEG (string).
826
827 Format: "yes", "window locut hicut", or "no" to disable.
828 Default is "12 2.2 2.5""", False),
829
830 _Switch(["-ungapped", "ungapped"], ["input"],
831 "Perform ungapped alignment only?"),
832
833 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
834 "Compute locally optimal Smith-Waterman alignments?"),
835
836 _Option(["-in_pssm", "in_pssm"], ["input", "file"], None, 0,
837 """PSI-BLAST checkpoint file
838
839 Incompatible with: remote, query""", False),
840 ]
841 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
842
847
848
850 """Wrapper for the NCBI BLAST+ program tblastx.
851
852 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
853 replaced the old blastall tool with separate tools for each of the searches.
854 This wrapper therefore replaces BlastallCommandline with option -p tblastx.
855
856 >>> from Bio.Blast.Applications import NcbitblastxCommandline
857 >>> cline = NcbitblastxCommandline(help=True)
858 >>> cline
859 NcbitblastxCommandline(cmd='tblastx', help=True)
860 >>> print cline
861 tblastx -help
862
863 You would typically run the command line with the Python subprocess module,
864 as described in the Biopython tutorial.
865 """
866 - def __init__(self, cmd="tblastx", **kwargs):
867 self.parameters = [ \
868
869 _Option(["-strand", "strand"], ["input"],
870 lambda value : value in ["both", "minus", "plus"],0,
871 """Query strand(s) to search against database/subject.
872
873 Values allowed are "both" (default), "minus", "plus".""", False),
874
875 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
876 """Genetic code to use to translate query
877
878 Integer. Default is one.""", False),
879
880 _Option(["-db_gencode", "db_gencode"], ["input"], None, 0,
881 """Genetic code to use to translate query
882
883 Integer. Default is one.""", False),
884 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
885 """Maximum intron length (integer).
886
887 Length of the largest intron allowed in a translated nucleotide
888 sequence when linking multiple distinct alignments (a negative
889 value disables linking). Default zero.""", False),
890 _Option(["-matrix", "matrix"], ["input"], None, 0,
891 "Scoring matrix name (default BLOSUM62).", False),
892 _Option(["-threshold", "threshold"], ["input"], None, 0,
893 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
894
895 _Option(["-seg", "seg"], ["input"], None, 0,
896 """Filter query sequence with SEG (string).
897
898 Format: "yes", "window locut hicut", or "no" to disable.
899 Default is "12 2.2 2.5""", False),
900 ]
901 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
902
903
905 """Wrapper for the NCBI BLAST+ program psiblast.
906
907 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
908 replaced the old blastpgp tool with a similar tool psiblast. This wrapper
909 therefore replaces BlastpgpCommandline, the wrapper for blastpgp.
910
911 >>> from Bio.Blast.Applications import NcbipsiblastCommandline
912 >>> cline = NcbipsiblastCommandline(help=True)
913 >>> cline
914 NcbipsiblastCommandline(cmd='psiblast', help=True)
915 >>> print cline
916 psiblast -help
917
918 You would typically run the command line with the Python subprocess module,
919 as described in the Biopython tutorial.
920 """
921 - def __init__(self, cmd="psiblast", **kwargs):
922 self.parameters = [ \
923
924 _Option(["-matrix", "matrix"], ["input"], None, 0,
925 "Scoring matrix name (default BLOSUM62).", False),
926 _Option(["-threshold", "threshold"], ["input"], None, 0,
927 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
928 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
929 lambda value : value in "0Ft2TtDd", 0,
930 """Use composition-based statistics (string, default 2, i.e. True).
931
932 0, F or f: no composition-based statistics
933 2, T or t, D or d : Composition-based score adjustment as in
934 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
935
936 Note that tblastn also supports values of 1 and 3.""", False),
937
938 _Option(["-seg", "seg"], ["input"], None, 0,
939 """Filter query sequence with SEG (string).
940
941 Format: "yes", "window locut hicut", or "no" to disable.
942 Default is "12 2.2 2.5""", False),
943
944 _Option(["-gap_trigger", "gap_trigger"], ["input"], None, 0,
945 "Number of bits to trigger gapping (float, default 22)", False),
946
947 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
948 "Compute locally optimal Smith-Waterman alignments?"),
949
950 _Option(["-num_iterations", "num_iterations"], ["input"], None, 0,
951 """Number of iterations to perform, integer
952
953 Integer of at least one. Default is one.
954 Incompatible with: remote""", False),
955 _Option(["-out_pssm", "out_pssm"], ["output", "file"], None, 0,
956 "File name to store checkpoint file", False),
957 _Option(["-out_ascii_pssm", "out_ascii_pssm"], ["output", "file"], None, 0,
958 "File name to store ASCII version of PSSM", False),
959 _Option(["-in_msa", "in_msa"], ["input", "file"], None, 0,
960 """File name of multiple sequence alignment to restart PSI-BLAST
961
962 Incompatible with: in_pssm, query""", False),
963 _Option(["-in_pssm", "in_pssm"], ["input", "file"], None, 0,
964 """PSI-BLAST checkpoint file
965
966 Incompatible with: in_msa, query, phi_pattern""", False),
967
968 _Option(["-pseudocount", "pseudocount"], ["input"], None, 0,
969 """Pseudo-count value used when constructing PSSM
970
971 Integer. Default is zero.""", False),
972 _Option(["-inclusion_ethresh", "inclusion_ethresh"], ["input"], None, 0,
973 """E-value inclusion threshold for pairwise alignments
974
975 Float. Default is 0.002.""", False),
976
977 _Option(["-phi_pattern", "phi_pattern"], ["input", "file"], None, 0,
978 """File name containing pattern to search
979
980 Incompatible with: in_pssm""", False),
981 ]
982 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
983
990
991
993 """Wrapper for the NCBI BLAST+ program rpsblast.
994
995 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
996 replaced the old rpsblast tool with a similar tool of the same name. This
997 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.
998
999 >>> from Bio.Blast.Applications import NcbirpsblastCommandline
1000 >>> cline = NcbirpsblastCommandline(help=True)
1001 >>> cline
1002 NcbirpsblastCommandline(cmd='rpsblast', help=True)
1003 >>> print cline
1004 rpsblast -help
1005
1006 You would typically run the command line with the Python subprocess module,
1007 as described in the Biopython tutorial.
1008 """
1009 - def __init__(self, cmd="rpsblast", **kwargs):
1010 self.parameters = [ \
1011
1012 _Option(["-seg", "seg"], ["input"], None, 0,
1013 """Filter query sequence with SEG (string).
1014
1015 Format: "yes", "window locut hicut", or "no" to disable.
1016 Default is "12 2.2 2.5""", False),
1017 ]
1018 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1019
1020
1022 """Wrapper for the NCBI BLAST+ program rpstblastn.
1023
1024 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1025 replaced the old rpsblast tool with a similar tool of the same name, and a
1026 separate tool rpstblastn for Translated Reverse Position Specific BLAST.
1027
1028 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
1029 >>> cline = NcbirpstblastnCommandline(help=True)
1030 >>> cline
1031 NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
1032 >>> print cline
1033 rpstblastn -help
1034
1035 You would typically run the command line with the Python subprocess module,
1036 as described in the Biopython tutorial.
1037 """
1038 - def __init__(self, cmd="rpstblastn", **kwargs):
1039 self.parameters = [ \
1040
1041 _Option(["-strand", "strand"], ["input"],
1042 lambda value : value in ["both", "minus", "plus"],0,
1043 """Query strand(s) to search against database/subject.
1044
1045 Values allowed are "both" (default), "minus", "plus".""", False),
1046
1047 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
1048 """Genetic code to use to translate query
1049
1050 Integer. Default is one.""", False),
1051
1052 _Option(["-seg", "seg"], ["input"], None, 0,
1053 """Filter query sequence with SEG (string).
1054
1055 Format: "yes", "window locut hicut", or "no" to disable.
1056 Default is "12 2.2 2.5""", False),
1057
1058 _Switch(["-ungapped", "ungapped"], ["input"],
1059 "Perform ungapped alignment only?"),
1060 ]
1061 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1062
1063
1065 """Run the Bio.Blast.Applications module's doctests."""
1066 import doctest
1067 doctest.testmod(verbose=1)
1068
1069 if __name__ == "__main__":
1070
1071 _test()
1072