Package Peach :: Package Generators :: Module unicode
[hide private]

Source Code for Module Peach.Generators.unicode

  1   
  2  ''' 
  3  Unicode generators 
  4   
  5  @author: Michael Eddington 
  6  @version: $Id: Peach.Generators.unicode-pysrc.html 1138 2008-08-16 19:39:03Z meddingt $ 
  7  ''' 
  8   
  9  # 
 10  # Copyright (c) 2006 Michael Eddington 
 11  # 
 12  # Permission is hereby granted, free of charge, to any person obtaining a copy  
 13  # of this software and associated documentation files (the "Software"), to deal 
 14  # in the Software without restriction, including without limitation the rights  
 15  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
 16  # copies of the Software, and to permit persons to whom the Software is  
 17  # furnished to do so, subject to the following conditions: 
 18  # 
 19  # The above copyright notice and this permission notice shall be included in     
 20  # all copies or substantial portions of the Software. 
 21  # 
 22  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
 23  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
 24  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
 25  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
 26  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
 27  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
 28  # SOFTWARE. 
 29  # 
 30   
 31  # Authors: 
 32  #   Michael Eddington (mike@phed.org) 
 33   
 34  # $Id: Peach.Generators.unicode-pysrc.html 1138 2008-08-16 19:39:03Z meddingt $ 
 35   
 36  import re, struct 
 37  from Peach import generator 
 38  from Peach.Generators.dictionary  import * 
 39  from Peach.Generators.static  import * 
 40   
 41  #__all__ = ['OverLongUtf8'] 
 42   
43 -class GoodUnicode(generator.SimpleGenerator):
44 _generator = Static('PLACE HOLDER') 45
46 - def __init__(self, group, generator):
47 ''' 48 @type group: Group 49 @param group: Group to use 50 ''' 51 52 Generator.__init__(self) 53 self.setGroup(group)
54
55 -class BadUnicode(generator.SimpleGenerator):
56 _generator = Static('PLACE HOLDER') 57
58 - def __init__(self, group, generator):
59 ''' 60 @type group: Group 61 @param group: Group to use 62 ''' 63 64 Generator.__init__(self) 65 self.setGroup(group)
66
67 -class OverLongUtf8(generator.Generator):
68 ''' 69 This generator creates overlong UTF-8 encodings. First output 70 is correct notation, then on each generation we perform a longer 71 encoding of each character until we can do no more. 72 73 NOTE: Only supports ascii chars under 127 right now :/ 74 ''' 75 76 _data = None 77 _size = 1 78 _maxSize = 6 79 _emptyByte = 0x80 80 _start2 = 0xC0 81 _start3 = 0xE0 82 _start4 = 0xF0 83 _start5 = 0xF8 84 _start6 = 0xFC 85 _firstMask = 0xC0 86 _lastMask = 0x80 87
88 - def __init__(self, group, data):
89 ''' 90 @type group: Group 91 @param group: Group to use 92 @type data: Generator 93 @param data: Data to perform UTF-8 encoding on 94 ''' 95 self.setGroup(group) 96 self._data = data
97
98 - def next(self):
99 self._size += 1 100 if self._size > self._maxSize: 101 raise generator.GeneratorCompleted("OverLongUtf8")
102
103 - def getRawValue(self):
104 data = self._data.getValue() 105 ret = '' 106 107 if self._size == 1: 108 return data 109 110 elif self._size == 2: 111 for c in data: 112 ret += "%c%c" % (self._start2, self._lastMask | ord(c)) 113 114 elif self._size == 3: 115 for c in data: 116 ret += "%c%c%c" % (self._start3, self._emptyByte, 117 self._lastMask | ord(c)) 118 119 elif self._size == 4: 120 for c in data: 121 ret += "%c%c%c%c" % (self._start4, self._emptyByte, 122 self._emptyByte, self._lastMask | ord(c)) 123 124 elif self._size == 5: 125 for c in data: 126 ret += "%c%c%c%c%c" % (self._start5, self._emptyByte, 127 self._emptyByte, self._emptyByte, 128 self._lastMask | ord(c)) 129 130 elif self._size == 6: 131 for c in data: 132 ret += "%c%c%c%c%c%c" % (self._start6, self._emptyByte, 133 self._emptyByte, self._emptyByte, 134 self._emptyByte, self._lastMask | ord(c)) 135 136 return ret
137
138 - def reset(self):
139 self._size = 1
140
141 - def unittest():
142 expected1 = "%c" % (0x0A) 143 expected2 = "%c%c" % (0xC0, 0x8A) 144 expected3 = "%c%c%c" % (0xE0, 0x80, 0x8A) 145 expected4 = "%c%c%c%c" % (0xF0, 0x80, 0x80, 0x8A) 146 expected5 = "%c%c%c%c%c" % (0xF8, 0x80, 0x80, 0x80, 0x8A) 147 expected6 = "%c%c%c%c%c%c" %(0xFC, 0x80, 0x80, 0x80, 0x80, 0x8A) 148 149 g = OverLongUtf8(None, Static("%c" % 0x0A)) 150 151 if g.getRawValue() != expected1: 152 print "OverLongUtf8 unittest failure 1" 153 g.next() 154 if g.getRawValue() != expected2: 155 print "OverLongUtf8 unittest failure 2" 156 g.next() 157 if g.getRawValue() != expected3: 158 print "OverLongUtf8 unittest failure 3" 159 g.next() 160 if g.getRawValue() != expected4: 161 print "OverLongUtf8 unittest failure 4" 162 g.next() 163 if g.getRawValue() != expected5: 164 print "OverLongUtf8 unittest failure 5" 165 g.next() 166 if g.getRawValue() != expected6: 167 print "OverLongUtf8 unittest failure 6" 168 print "Done with OverLongUtf8 unittests"
169 unittest = staticmethod(unittest)
170 171 172 173 # end 174