-
Notifications
You must be signed in to change notification settings - Fork 594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cleanup imports in common.py, lstm.py, extract exceptions #154
Changes from all commits
3269303
ba323ce
db719a2
101791b
1039d5e
9405c68
43a9940
cf050f6
b4381aa
51ffbb6
d23ee42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,100 +11,33 @@ | |
import sys | ||
import sysconfig | ||
import unicodedata | ||
import warnings | ||
import inspect | ||
import glob | ||
import cPickle | ||
from ocrolib.exceptions import (BadClassLabel, BadInput, FileNotFound, | ||
OcropusException) | ||
|
||
import numpy | ||
from numpy import * | ||
from numpy import (amax, amin, array, bitwise_and, clip, dtype, mean, minimum, | ||
nan, sin, sqrt, zeros) | ||
import pylab | ||
from pylab import imshow | ||
from scipy.ndimage import morphology,measurements | ||
from pylab import (clf, cm, ginput, gray, imshow, ion, subplot, where) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've got a branch that removes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean https://github.com/QuLogic/ocropy/commits/standard-import? Sure, open a PR with the pertinent commits, looks really helpful, I'll happily merge/cherry-pick. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's the one; though I just noticed that it's based on the Python 3 work, so it'll be a pain to cherry-pick. Should get that in ASAP and then the rest would be easier. |
||
from scipy.ndimage import morphology, measurements | ||
import PIL | ||
|
||
from default import getlocal | ||
from toplevel import * | ||
from toplevel import (checks, ABINARY2, AINT2, AINT3, BOOL, DARKSEG, GRAYSCALE, | ||
LIGHTSEG, LINESEG, PAGESEG) | ||
import chars | ||
import codecs | ||
import ligatures | ||
import lstm | ||
import morph | ||
import multiprocessing | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should move up to first block (standard library imports). |
||
|
||
################################################################ | ||
### exceptions | ||
################################################################ | ||
|
||
class OcropusException(Exception): | ||
trace = 1 | ||
def __init__(self,*args,**kw): | ||
Exception.__init__(self,*args,**kw) | ||
|
||
class Unimplemented(OcropusException): | ||
trace = 1 | ||
"Exception raised when a feature is unimplemented." | ||
def __init__(self,s): | ||
Exception.__init__(self,inspect.stack()[1][3]) | ||
|
||
class Internal(OcropusException): | ||
trace = 1 | ||
"Exception raised when a feature is unimplemented." | ||
def __init__(self,s): | ||
Exception.__init__(self,inspect.stack()[1][3]) | ||
|
||
class RecognitionError(OcropusException): | ||
trace = 1 | ||
"Some kind of error during recognition." | ||
def __init__(self,explanation,**kw): | ||
self.context = kw | ||
s = [explanation] | ||
s += ["%s=%s"%(k,summary(kw[k])) for k in kw] | ||
message = " ".join(s) | ||
Exception.__init__(self,message) | ||
|
||
class Warning(OcropusException): | ||
trace = 0 | ||
def __init__(self,*args,**kw): | ||
OcropusException.__init__(self,*args,**kw) | ||
|
||
class BadClassLabel(OcropusException): | ||
trace = 0 | ||
"Exception for bad class labels in a dataset or input." | ||
def __init__(self,s): | ||
Exception.__init__(self,s) | ||
|
||
class BadImage(OcropusException): | ||
trace = 0 | ||
def __init__(self,*args,**kw): | ||
OcropusException.__init__(self,*args) | ||
|
||
class BadInput(OcropusException): | ||
trace = 0 | ||
def __init__(self,*args,**kw): | ||
OcropusException.__init__(self,*args,**kw) | ||
|
||
class FileNotFound(OcropusException): | ||
trace = 0 | ||
"""Some file-not-found error during OCRopus processing.""" | ||
def __init__(self,fname): | ||
self.fname = fname | ||
def __str__(self): | ||
return "file not found %s"%(self.fname,) | ||
import sl | ||
|
||
pickle_mode = 2 | ||
|
||
def deprecated(f): | ||
def _wrap(f): | ||
warned = 0 | ||
def _wrapper(*args,**kw): | ||
if not warned: | ||
print(f, "has been DEPRECATED") | ||
warned = 1 | ||
return f(*args,**kw) | ||
return _wrap | ||
|
||
|
||
|
||
################################################################ | ||
# text normalization | ||
|
@@ -466,10 +399,10 @@ def extractMasked(self,image,index,grow=0,bg=None,margin=0,dtype=None): | |
mh,mw = mask.shape | ||
box = self.bbox(index) | ||
r0,c0,r1,c1 = box | ||
subimage = improc.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg) | ||
subimage = sl.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg) | ||
return where(mask,subimage,bg) | ||
|
||
|
||
|
||
################################################################ | ||
### Object reading and writing | ||
|
@@ -517,7 +450,7 @@ class names that have changed.""" | |
unpickler.find_global = unpickle_find_global | ||
return unpickler.load() | ||
|
||
|
||
|
||
################################################################ | ||
### Simple record object. | ||
|
@@ -581,16 +514,6 @@ def check_valid_class_label(s): | |
else: | ||
raise BadClassLabel(s) | ||
|
||
def summary(x): | ||
"""Summarize a datatype as a string (for display and debugging).""" | ||
if type(x)==numpy.ndarray: | ||
return "<ndarray %s %s>"%(x.shape,x.dtype) | ||
if type(x)==str and len(x)>10: | ||
return '"%s..."'%x | ||
if type(x)==list and len(x)>10: | ||
return '%s...'%x | ||
return str(x) | ||
|
||
################################################################ | ||
### file name manipulation | ||
################################################################ | ||
|
@@ -824,19 +747,6 @@ def quick_check_line_components(line_bin,dpi): | |
there is probably something wrong.""" | ||
return 1.0 | ||
|
||
def deprecated(func): | ||
"""This is a decorator which can be used to mark functions | ||
as deprecated. It will result in a warning being emitted | ||
when the function is used.""" | ||
def newFunc(*args, **kwargs): | ||
warnings.warn("Call to deprecated function %s." % func.__name__, | ||
category=DeprecationWarning,stacklevel=2) | ||
return func(*args, **kwargs) | ||
newFunc.__name__ = func.__name__ | ||
newFunc.__doc__ = func.__doc__ | ||
newFunc.__dict__.update(func.__dict__) | ||
return newFunc | ||
|
||
################################################################ | ||
### conversion functions | ||
################################################################ | ||
|
@@ -904,67 +814,6 @@ def obinfo(ob): | |
result += str(ob.shape) | ||
return result | ||
|
||
def save_component(file,object,verbose=0,verify=0): | ||
"""Save an object to disk in an appropriate format. If the object | ||
is a wrapper for a native component (=inherits from | ||
CommonComponent and has a comp attribute, or is in package | ||
ocropus), write it using ocropus.save_component in native format. | ||
Otherwise, write it using Python's pickle. We could use pickle | ||
for everything (since the native components pickle), but that | ||
would be slower and more confusing.""" | ||
if hasattr(object,"save_component"): | ||
object.save_component(file) | ||
return | ||
if object.__class__.__name__=="CommonComponent" and hasattr(object,"comp"): | ||
# FIXME -- get rid of this eventually | ||
import ocropus | ||
ocropus.save_component(file,object.comp) | ||
return | ||
if type(object).__module__=="ocropus": | ||
import ocropus | ||
ocropus.save_component(file,object) | ||
return | ||
if verbose: | ||
print("[save_component]") | ||
if verbose: | ||
for k,v in object.__dict__.items(): | ||
print(":", k, obinfo(v)) | ||
with open(file,"wb") as stream: | ||
pickle.dump(object,stream,pickle_mode) | ||
if verify: | ||
if verbose: | ||
print("[trying to read it again]") | ||
with open(file,"rb") as stream: | ||
pickle.load(stream) | ||
|
||
def load_component(file): | ||
"""Load a component. This handles various special cases, | ||
including old-style C++ recognizers (soon to be gotten rid of), | ||
python expressions ("=package.ObjectName(arg1,arg2)"), | ||
and simple pickled Python objects (default).""" | ||
if file[0]=="=": | ||
return pyconstruct(file[1:]) | ||
elif file[0]=="@": | ||
file = file[1:] | ||
with open(file,"r") as stream: | ||
# FIXME -- get rid of this eventually | ||
start = stream.read(128) | ||
if start.startswith("<object>\nlinerec\n"): | ||
# FIXME -- get rid of this eventually | ||
warnings.warn("loading old-style linerec: %s"%file) | ||
result = RecognizeLine() | ||
import ocropus | ||
result.comp = ocropus.load_IRecognizeLine(file) | ||
return result | ||
if start.startswith("<object>"): | ||
# FIXME -- get rid of this eventually | ||
warnings.warn("loading old-style cmodel: %s"%file) | ||
import ocroold | ||
result = ocroold.Model() | ||
import ocropus | ||
result.comp = ocropus.load_IModel(file) | ||
return result | ||
return load_object(file) | ||
|
||
def binarize_range(image,dtype='B',threshold=0.5): | ||
"""Binarize an image by its range.""" | ||
|
@@ -973,39 +822,6 @@ def binarize_range(image,dtype='B',threshold=0.5): | |
if dtype=='B': scale = 255 | ||
return array(scale*(image>threshold),dtype=dtype) | ||
|
||
def draw_pseg(pseg,axis=None): | ||
if axis is None: | ||
axis = subplot(111) | ||
h = pseg.dim(1) | ||
regions = ocropy.RegionExtractor() | ||
regions.setPageLines(pseg) | ||
for i in range(1,regions.length()): | ||
x0,y0,x1,y1 = (regions.x0(i),regions.y0(i),regions.x1(i),regions.y1(i)) | ||
p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor="red",fill=0) | ||
axis.add_patch(p) | ||
|
||
def draw_aligned(result,axis=None): | ||
raise Unimplemented("FIXME draw_aligned") | ||
if axis is None: | ||
axis = subplot(111) | ||
axis.imshow(NI(result.image),cmap=cm.gray) | ||
cseg = result.cseg | ||
if type(cseg)==numpy.ndarray: cseg = common.lseg2narray(cseg) | ||
ocropy.make_line_segmentation_black(cseg) | ||
ocropy.renumber_labels(cseg,1) | ||
bboxes = ocropy.rectarray() | ||
ocropy.bounding_boxes(bboxes,cseg) | ||
s = re.sub(r'\s+','',result.output) | ||
h = cseg.dim(1) | ||
for i in range(1,bboxes.length()): | ||
r = bboxes.at(i) | ||
x0,y0,x1,y1 = (r.x0,r.y0,r.x1,r.y1) | ||
p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor=(0.0,0.0,1.0,0.5),fill=0) | ||
axis.add_patch(p) | ||
if i>0 and i-1<len(s): | ||
axis.text(x0,h-y0-1,s[i-1],color="red",weight="bold",fontsize=14) | ||
draw() | ||
|
||
def plotgrid(data,d=10,shape=(30,30)): | ||
"""Plot a list of images on a grid.""" | ||
ion() | ||
|
@@ -1024,13 +840,13 @@ def showrgb(r,g=None,b=None): | |
imshow(array([r,g,b]).transpose([1,2,0])) | ||
|
||
def showgrid(l,cols=None,n=400,titles=None,xlabels=None,ylabels=None,**kw): | ||
if "cmap" not in kw: kw["cmap"] = pylab.cm.gray | ||
if "cmap" not in kw: kw["cmap"] = cm.gray | ||
if "interpolation" not in kw: kw["interpolation"] = "nearest" | ||
n = minimum(n,len(l)) | ||
if cols is None: cols = int(sqrt(n)) | ||
rows = (n+cols-1)//cols | ||
for i in range(n): | ||
pylab.xticks([]); pylab.yticks([]) | ||
pylab.xticks([]) ;pylab.yticks([]) | ||
pylab.subplot(rows,cols,i+1) | ||
pylab.imshow(l[i],**kw) | ||
if titles is not None: pylab.title(str(titles[i])) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import inspect | ||
import numpy | ||
|
||
def summary(x): | ||
"""Summarize a datatype as a string (for display and debugging).""" | ||
if type(x)==numpy.ndarray: | ||
return "<ndarray %s %s>"%(x.shape,x.dtype) | ||
if type(x)==str and len(x)>10: | ||
return '"%s..."'%x | ||
if type(x)==list and len(x)>10: | ||
return '%s...'%x | ||
return str(x) | ||
|
||
|
||
################################################################ | ||
### Ocropy exceptions | ||
################################################################ | ||
|
||
class OcropusException(Exception): | ||
trace = 1 | ||
def __init__(self,*args,**kw): | ||
Exception.__init__(self,*args,**kw) | ||
|
||
class Unimplemented(OcropusException): | ||
trace = 1 | ||
"Exception raised when a feature is unimplemented." | ||
def __init__(self,s): | ||
Exception.__init__(self,inspect.stack()[1][3]) | ||
|
||
class Internal(OcropusException): | ||
trace = 1 | ||
"Exception raised when a feature is unimplemented." | ||
def __init__(self,s): | ||
Exception.__init__(self,inspect.stack()[1][3]) | ||
|
||
class RecognitionError(OcropusException): | ||
trace = 1 | ||
"Some kind of error during recognition." | ||
def __init__(self,explanation,**kw): | ||
self.context = kw | ||
s = [explanation] | ||
s += ["%s=%s"%(k,summary(kw[k])) for k in kw] | ||
message = " ".join(s) | ||
Exception.__init__(self,message) | ||
|
||
class Warning(OcropusException): | ||
trace = 0 | ||
def __init__(self,*args,**kw): | ||
OcropusException.__init__(self,*args,**kw) | ||
|
||
class BadClassLabel(OcropusException): | ||
trace = 0 | ||
"Exception for bad class labels in a dataset or input." | ||
def __init__(self,s): | ||
Exception.__init__(self,s) | ||
|
||
class BadImage(OcropusException): | ||
trace = 0 | ||
def __init__(self,*args,**kw): | ||
OcropusException.__init__(self,*args) | ||
|
||
class BadInput(OcropusException): | ||
trace = 0 | ||
def __init__(self,*args,**kw): | ||
OcropusException.__init__(self,*args,**kw) | ||
|
||
class FileNotFound(OcropusException): | ||
trace = 0 | ||
"""Some file-not-found error during OCRopus processing.""" | ||
def __init__(self,fname): | ||
self.fname = fname | ||
def __str__(self): | ||
return "file not found %s"%(self.fname,) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should move down to third block (first-party imports).