1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """
18 Simple transliteration
19 """
20
21 __id__ = __revision__ = "$Id: translit.py 102 2007-07-12 12:33:36Z the.pythy $"
22 __url__ = "$URL: https://pythy.googlecode.com/svn/tags/pytils/0_2_2/pytils/translit.py $"
23
24 import re
25 from pytils import utils
26
27 TRANSTABLE = (
28 (u"'", u"'"),
29 (u'"', u'"'),
30 (u"‘", u"'"),
31 (u"’", u"'"),
32 (u"«", u'"'),
33 (u"»", u'"'),
34 (u"–", u"-"),
35 (u"…", u"..."),
36 (u"№", u"#"),
37
38
39 (u"Щ", u"Sch"),
40
41
42
43
44 (u"Щ", u"SCH"),
45
46 (u"Ё", u"Yo"),
47 (u"Ё", u"YO"),
48 (u"Ж", u"Zh"),
49 (u"Ж", u"ZH"),
50 (u"Ц", u"Ts"),
51 (u"Ц", u"TS"),
52 (u"Ч", u"Ch"),
53 (u"Ч", u"CH"),
54 (u"Ш", u"Sh"),
55 (u"Ш", u"SH"),
56 (u"Ы", u"Yi"),
57 (u"Ы", u"YI"),
58 (u"Ю", u"Yu"),
59 (u"Ю", u"YU"),
60 (u"Я", u"Ya"),
61 (u"Я", u"YA"),
62
63 (u"А", u"A"),
64 (u"Б", u"B"),
65 (u"В", u"V"),
66 (u"Г", u"G"),
67 (u"Д", u"D"),
68 (u"Е", u"E"),
69 (u"З", u"Z"),
70 (u"И", u"I"),
71 (u"Й", u"J"),
72 (u"К", u"K"),
73 (u"Л", u"L"),
74 (u"М", u"M"),
75 (u"Н", u"N"),
76 (u"О", u"O"),
77 (u"П", u"P"),
78 (u"Р", u"R"),
79 (u"С", u"S"),
80 (u"Т", u"T"),
81 (u"У", u"U"),
82 (u"Ф", u"F"),
83 (u"Х", u"H"),
84 (u"Э", u"E"),
85 (u"Ъ", u"`"),
86 (u"Ь", u"'"),
87
88
89 (u"щ", u"sch"),
90
91 (u"ё", u"yo"),
92 (u"ж", u"zh"),
93 (u"ц", u"ts"),
94 (u"ч", u"ch"),
95 (u"ш", u"sh"),
96 (u"ы", u"yi"),
97 (u"ю", u"yu"),
98 (u"я", u"ya"),
99
100 (u"а", u"a"),
101 (u"б", u"b"),
102 (u"в", u"v"),
103 (u"г", u"g"),
104 (u"д", u"d"),
105 (u"е", u"e"),
106 (u"з", u"z"),
107 (u"и", u"i"),
108 (u"й", u"j"),
109 (u"к", u"k"),
110 (u"л", u"l"),
111 (u"м", u"m"),
112 (u"н", u"n"),
113 (u"о", u"o"),
114 (u"п", u"p"),
115 (u"р", u"r"),
116 (u"с", u"s"),
117 (u"т", u"t"),
118 (u"у", u"u"),
119 (u"ф", u"f"),
120 (u"х", u"h"),
121 (u"э", u"e"),
122 (u"ъ", u"`"),
123 (u"ь", u"'"),
124
125
126
127 (u"c", u"c"),
128 (u"q", u"q"),
129 (u"y", u"y"),
130 (u"x", u"x"),
131 (u"w", u"w"),
132 (u"1", u"1"),
133 (u"2", u"2"),
134 (u"3", u"3"),
135 (u"4", u"4"),
136 (u"5", u"5"),
137 (u"6", u"6"),
138 (u"7", u"7"),
139 (u"8", u"8"),
140 (u"9", u"9"),
141 (u"0", u"0"),
142 )
143
144 RU_ALPHABET = [x[0] for x in TRANSTABLE]
145 EN_ALPHABET = [x[1] for x in TRANSTABLE]
146 ALPHABET = RU_ALPHABET + EN_ALPHABET
147
148
150 """
151 Translify russian text
152
153 @param in_string: input string
154 @type in_string: C{unicode}
155
156 @return: transliterated string
157 @rtype: C{str}
158
159 @raise TypeError: when in_string is not C{unicode}
160 @raise ValueError: when string doesn't transliterate completely
161 """
162 utils.check_type('in_string', unicode)
163
164 translit = in_string
165 for symb_in, symb_out in TRANSTABLE:
166 translit = translit.replace(symb_in, symb_out)
167
168 try:
169 translit = str(translit)
170 except UnicodeEncodeError:
171 raise ValueError("Unicode string doesn't transliterate completely, " + \
172 "is it russian?")
173
174 return translit
175
176
178 """
179 Detranslify
180
181 @param in_string: input string
182 @type in_string: C{basestring}
183
184 @return: detransliterated string
185 @rtype: C{str}
186
187 @raise TypeError: when in_string neither C{str}, no C{unicode}
188 @raise ValueError: if in_string is C{str}, but it isn't ascii
189 """
190 utils.check_type('in_string', basestring)
191
192
193 try:
194 russian = unicode(in_string)
195 except UnicodeDecodeError:
196 raise ValueError("We expects when in_string is str type," + \
197 "it is an ascii, but now it isn't. Use unicode " + \
198 "in this case.")
199
200 for symb_out, symb_in in TRANSTABLE:
201 russian = russian.replace(symb_in, symb_out)
202
203 return russian
204
205
207 """
208 Prepare string for slug (i.e. URL or file/dir name)
209
210 @param in_string: input string
211 @type in_string: C{basestring}
212
213 @return: slug-string
214 @rtype: C{str}
215
216 @raise TypeError: when in_string isn't C{unicode} or C{str}
217 @raise ValueError: if in_string is C{str}, but it isn't ascii
218 """
219 utils.check_type('in_string', basestring)
220 try:
221 u_in_string = unicode(in_string).lower()
222 except UnicodeDecodeError:
223 raise ValueError("We expects when in_string is str type," + \
224 "it is an ascii, but now it isn't. Use unicode " + \
225 "in this case.")
226
227 u_in_string = re.sub('\&\;|\&', ' and ', u_in_string)
228
229 u_in_string = re.sub('[-\s]+', '-', u_in_string)
230
231 u_in_string = u''.join([symb for symb in u_in_string if symb in ALPHABET])
232
233 out_string = translify(u_in_string)
234
235 return re.sub('[^\w\s-]', '', out_string).strip().lower()
236
237
239 """
240 Alias for L{slugify}
241 """
242 slugify(in_string)
243