forked from proycon/pynlpl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommon.py
136 lines (116 loc) · 4.19 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python
#-*- coding:utf-8 -*-
###############################################################9
# PyNLPl - Common functions
# by Maarten van Gompel
# Centre for Language Studies
# Radboud University Nijmegen
# http://www.github.com/proycon/pynlpl
# proycon AT anaproy DOT nl
#
# Licensed under GPLv3
#
# This contains very common functions and language extensions
#
###############################################################
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
import datetime
from sys import stderr, version
## From http://code.activestate.com/recipes/413486/ (r7)
def Enum(*names):
##assert names, "Empty enums are not supported" # <- Don't like empty enums? Uncomment!
class EnumClass(object):
__slots__ = names
def __iter__(self): return iter(constants)
def __len__(self): return len(constants)
def __getitem__(self, i): return constants[i]
def __repr__(self): return 'Enum' + str(names)
def __str__(self): return 'enum ' + str(constants)
class EnumValue(object):
__slots__ = ('__value')
def __init__(self, value): self.__value = value
Value = property(lambda self: self.__value)
EnumType = property(lambda self: EnumType)
def __hash__(self): return hash(self.__value)
def __cmp__(self, other):
# C fans might want to remove the following assertion
# to make all enums comparable by ordinal value {;))
assert self.EnumType is other.EnumType, "Only values from the same enum are comparable"
return cmp(self.__value, other.__value)
def __invert__(self): return constants[maximum - self.__value]
def __bool__(self): return bool(self.__value)
def __nonzero__(self): return bool(self.__value) #Python 2.x
def __repr__(self): return str(names[self.__value])
maximum = len(names) - 1
constants = [None] * len(names)
for i, each in enumerate(names):
val = EnumValue(i)
setattr(EnumClass, each, val)
constants[i] = val
constants = tuple(constants)
EnumType = EnumClass()
return EnumType
def u(s, encoding = 'utf-8', errors='strict'):
#ensure s is properly unicode.. wrapper for python 2.6/2.7,
if version < '3':
#ensure the object is unicode
if isinstance(s, unicode):
return s
else:
return unicode(s, encoding,errors=errors)
else:
#will work on byte arrays
if isinstance(s, str):
return s
else:
return str(s,encoding,errors=errors)
def b(s):
#ensure s is bytestring
if version < '3':
#ensure the object is unicode
if isinstance(s, str):
return s
else:
return s.encode('utf-8')
else:
#will work on byte arrays
if isinstance(s, bytes):
return s
else:
return s.encode('utf-8')
def isstring(s): #Is this a proper string?
return isinstance(s, str) or (version < '3' and isinstance(s, unicode))
def log(msg, **kwargs):
"""Generic log method. Will prepend timestamp.
Keyword arguments:
system - Name of the system/module
indent - Integer denoting the desired level of indentation
streams - List of streams to output to
stream - Stream to output to (singleton version of streams)
"""
if 'debug' in kwargs:
if 'currentdebug' in kwargs:
if kwargs['currentdebug'] < kwargs['debug']:
return False
else:
return False #no currentdebug passed, assuming no debug mode and thus skipping message
s = "[" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "] "
if 'system' in kwargs:
s += "[" + system + "] "
if 'indent' in kwargs:
s += ("\t" * int(kwargs['indent']))
s += u(msg)
if s[-1] != '\n':
s += '\n'
if 'streams' in kwargs:
streams = kwargs['streams']
elif 'stream' in kwargs:
streams = [kwargs['stream']]
else:
streams = [stderr]
for stream in streams:
stream.write(s)
return s