Package Bio :: Package Sequencing :: Package Applications :: Module _Novoalign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Sequencing.Applications._Novoalign

  1  # Copyright 2009 by Osvaldo Zagordi.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the short read aligner Novoalign by Novocraft (www.novocraft.com) 
  6   
  7  Last checked against version: 2.05.04 
  8  """ 
  9  import types 
 10  from Bio.Application import _Option, AbstractCommandline 
 11   
12 -class NovoalignCommandline(AbstractCommandline):
13 """Command line wrapper for the short read alignment program novoalign by Novocraft."""
14 - def __init__(self, cmd="novoalign", **kwargs):
15 16 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ'] 17 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM'] 18 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99'] 19 20 self.parameters = \ 21 [ 22 _Option(["-d", "database"], ["input", "file"], 23 None, 0, "database filename", 24 0), 25 _Option(["-f", "readfile"], ["input", "file"], 26 None, 0, "read file", 27 0), 28 _Option(["-F", "format"], ["input", "option"], 29 lambda x: x in READ_FORMAT, 30 0, "Format of read files.\n\nAllowed values: %s" % ", ".join(READ_FORMAT), 31 0), 32 33 # Alignment scoring options 34 _Option(["-t", "threshold"], ["input"], 35 lambda x: isinstance(x, types.IntType), 36 0, "Threshold for alignment score", 37 0), 38 _Option(["-g", "gap_open"], ["input"], 39 lambda x: isinstance(x, types.IntType), 40 0, "Gap opening penalty [default: 40]", 41 0), 42 _Option(["-x", "gap_extend"], ["input"], 43 lambda x: isinstance(x, types.IntType), 44 0, "Gap extend penalty [default: 15]", 45 0), 46 _Option(["-u", "unconverted"], ["input"], 47 lambda x: isinstance(x, types.IntType), 0, 48 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" 49 "Default: no penalty", 50 0), 51 52 # Quality control and read filtering 53 _Option(["-l", "good_bases"], ["input"], 54 lambda x: isinstance(x, types.IntType), 55 0, "Minimum number of good quality bases [default: log(N_g, 4) + 5]", 56 0), 57 _Option(["-h", "homopolymer"], ["input"], 58 lambda x: isinstance(x, types.IntType), 59 0, "Homopolymer read filter [default: 20; disable: negative value]", 60 0), 61 62 # Read preprocessing options 63 _Option(["-a", "adapter3"], ["input"], 64 lambda x: isinstance(x, types.StringType), 65 0, "Strips a 3' adapter sequence prior to alignment.\n\n" 66 "With paired ends two adapters can be specified", 67 0), 68 _Option(["-n", "truncate"], ["input"], 69 lambda x: isinstance(x, types.IntType), 70 0, "Truncate to specific length before alignment", 71 0), 72 _Option(["-s", "trimming"], ["input"], 73 lambda x: isinstance(x, types.IntType), 74 0, "If fail to align, trim by s bases until they map or become shorter than l.\n\n" 75 "Ddefault: 2", 76 0), 77 _Option(["-5", "adapter5"], ["input"], 78 lambda x: isinstance(x, types.StringType), 79 0, "Strips a 5' adapter sequence.\n\n" 80 "Similar to -a (adaptor_3), but on the 5' end.", 81 0), 82 # Reporting options 83 _Option(["-o", "report"], ["input"], 84 lambda x: x in REPORT_FORMAT, 85 0, "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \ 86 % ", ".join(REPORT_FORMAT), 87 0), 88 _Option(["-Q", "quality"], ["input"], 89 lambda x: isinstance(x, types.IntType), 90 0, "Lower threshold for an alignment to be reported [default: 0]", 91 0), 92 _Option(["-R", "repeats"], ["input"], 93 lambda x: isinstance(x, types.IntType), 94 0, "If score difference is higher, report repeats.\n\n" 95 "Otherwise -r read method applies [default: 5]", 96 0), 97 _Option(["-r", "read_method"], ["input"], 98 lambda x: x.split()[0] in REPEAT_METHOD, 99 0, "Methods to report reads with multiple matches.\n\n" 100 "Allowed values: %s\n" 101 "'All' and 'Exhaustive' accept limits." \ 102 % ", ".join(REPEAT_METHOD), 103 0), 104 _Option(["-e", "recorded"], ["input"], 105 lambda x: isinstance(x, types.IntType), 106 0, "Alignments recorded with score equal to the best.\n\n" 107 "Default: 1000 in default read method, otherwise no limit.", 108 0), 109 _Option(["-q", "qual_digits"], ["input"], 110 lambda x: isinstance(x, types.IntType), 111 0, "Decimal digits for quality scores [default: 0]", 112 0), 113 114 # Paired end options 115 _Option(["-i", "fragment"], ["input"], 116 lambda x: len(x.split()) == 2, 117 0, "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", 118 0), 119 _Option(["-v", "variation"], ["input"], 120 lambda x: isinstance(x, types.IntType), 121 0, "Structural variation penalty [default: 70]", 122 0), 123 124 # miRNA mode 125 _Option(["-m", "miRNA"], ["input"], 126 lambda x: isinstance(x, types.IntType), 127 0, "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", 128 0), 129 130 # Multithreading 131 _Option(["-c", "cores"], ["input"], 132 lambda x: isinstance(x, types.IntType), 133 0, "Number of threads, disabled on free versions [default: number of cores]", 134 0), 135 136 # Quality calibrations 137 _Option(["-k", "read_cal"], ["input"], 138 lambda x: isinstance(x, types.StringType), 139 0, "Read quality calibration from file (mismatch counts)", 140 0), 141 _Option(["-K", "write_cal"], ["input"], 142 lambda x: isinstance(x, types.StringType), 143 0, "Accumulate mismatch counts and write to file", 144 0) 145 ] 146 AbstractCommandline.__init__(self, cmd, **kwargs)
147 148 if __name__ == '__main__': 149 cml = NovoalignCommandline(database='~/some_dir/some_db', 150 readfile='~/some_dir/some_seq.txt') 151 cml.format = 'PRBnSEQ' 152 cml.r_method='0.99' 153 cml.fragment = '250 20' # must be given as a string 154 cml.miRNA = 100 155 print cml 156 # subprocess.call(str(cml), shell=True) 157