1
2 '''
3 Unicode generators
4
5 @author: Michael Eddington
6 @version: $Id: Peach.Generators.unicode-pysrc.html 1138 2008-08-16 19:39:03Z meddingt $
7 '''
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 import re, struct
37 from Peach import generator
38 from Peach.Generators.dictionary import *
39 from Peach.Generators.static import *
40
41
42
54
66
68 '''
69 This generator creates overlong UTF-8 encodings. First output
70 is correct notation, then on each generation we perform a longer
71 encoding of each character until we can do no more.
72
73 NOTE: Only supports ascii chars under 127 right now :/
74 '''
75
76 _data = None
77 _size = 1
78 _maxSize = 6
79 _emptyByte = 0x80
80 _start2 = 0xC0
81 _start3 = 0xE0
82 _start4 = 0xF0
83 _start5 = 0xF8
84 _start6 = 0xFC
85 _firstMask = 0xC0
86 _lastMask = 0x80
87
89 '''
90 @type group: Group
91 @param group: Group to use
92 @type data: Generator
93 @param data: Data to perform UTF-8 encoding on
94 '''
95 self.setGroup(group)
96 self._data = data
97
102
104 data = self._data.getValue()
105 ret = ''
106
107 if self._size == 1:
108 return data
109
110 elif self._size == 2:
111 for c in data:
112 ret += "%c%c" % (self._start2, self._lastMask | ord(c))
113
114 elif self._size == 3:
115 for c in data:
116 ret += "%c%c%c" % (self._start3, self._emptyByte,
117 self._lastMask | ord(c))
118
119 elif self._size == 4:
120 for c in data:
121 ret += "%c%c%c%c" % (self._start4, self._emptyByte,
122 self._emptyByte, self._lastMask | ord(c))
123
124 elif self._size == 5:
125 for c in data:
126 ret += "%c%c%c%c%c" % (self._start5, self._emptyByte,
127 self._emptyByte, self._emptyByte,
128 self._lastMask | ord(c))
129
130 elif self._size == 6:
131 for c in data:
132 ret += "%c%c%c%c%c%c" % (self._start6, self._emptyByte,
133 self._emptyByte, self._emptyByte,
134 self._emptyByte, self._lastMask | ord(c))
135
136 return ret
137
140
142 expected1 = "%c" % (0x0A)
143 expected2 = "%c%c" % (0xC0, 0x8A)
144 expected3 = "%c%c%c" % (0xE0, 0x80, 0x8A)
145 expected4 = "%c%c%c%c" % (0xF0, 0x80, 0x80, 0x8A)
146 expected5 = "%c%c%c%c%c" % (0xF8, 0x80, 0x80, 0x80, 0x8A)
147 expected6 = "%c%c%c%c%c%c" %(0xFC, 0x80, 0x80, 0x80, 0x80, 0x8A)
148
149 g = OverLongUtf8(None, Static("%c" % 0x0A))
150
151 if g.getRawValue() != expected1:
152 print "OverLongUtf8 unittest failure 1"
153 g.next()
154 if g.getRawValue() != expected2:
155 print "OverLongUtf8 unittest failure 2"
156 g.next()
157 if g.getRawValue() != expected3:
158 print "OverLongUtf8 unittest failure 3"
159 g.next()
160 if g.getRawValue() != expected4:
161 print "OverLongUtf8 unittest failure 4"
162 g.next()
163 if g.getRawValue() != expected5:
164 print "OverLongUtf8 unittest failure 5"
165 g.next()
166 if g.getRawValue() != expected6:
167 print "OverLongUtf8 unittest failure 6"
168 print "Done with OverLongUtf8 unittests"
169 unittest = staticmethod(unittest)
170
171
172
173
174