-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtxt.py
56 lines (43 loc) · 1.33 KB
/
txt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#sdobrev 2005
'text encodings, escaping, whitespace stripping'
MODEL_ENCODING = 'utf-8'
PREFERRED_ENCODING = 'cp1251'
# which is which bitch?
def TXT( txt): #to unicode.
if isinstance(txt, unicode):
return txt
try:
return unicode( txt, MODEL_ENCODING)
except UnicodeDecodeError:
return unicode( txt, PREFERRED_ENCODING)
def TXTL( lst):
return [ TXT(i) for i in lst ]
_ = TXT
def u2s( value):
if isinstance(value, unicode):
value = value.encode('utf-8')
return value
def XTX( txt): #from unicode
assert isinstance(txt, unicode)
try:
return txt.encode( PREFERRED_ENCODING)
except UnicodeDecodeError:
return txt.encode( MODEL_ENCODING)
#############
from xml.sax.saxutils import escape #, quoteattr
def escape_xml_attrs( kv_tuples):
for k,v in kv_tuples:
if isinstance(v,basestring):
v = escape_xml_attr(v)
yield k,v
def escape_xml_text( t): return escape(t)
_escs = {'\n': ' ', '\r': ' ', '\t':'	', '"': """ }
def escape_xml_attr( text): return escape( text, _escs)
###############
import re
whitespace = '\n\r \t\xa0'
def strip(x): return x.strip( whitespace)
re_spc = re.compile( '['+whitespace+']+')
def slim1( x): return re_spc.sub( ' ', x)
def slim( x): return strip( slim1( x) )
# vim:ts=4:sw=4:expandtab