Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup imports in common.py, lstm.py, extract exceptions #154

Merged
merged 11 commits into from
Jan 23, 2017
212 changes: 14 additions & 198 deletions ocrolib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,100 +11,33 @@
import sys
import sysconfig
import unicodedata
import warnings
import inspect
import glob
import cPickle
from ocrolib.exceptions import (BadClassLabel, BadInput, FileNotFound,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should move down to third block (first-party imports).

OcropusException)

import numpy
from numpy import *
from numpy import (amax, amin, array, bitwise_and, clip, dtype, mean, minimum,
nan, sin, sqrt, zeros)
import pylab
from pylab import imshow
from scipy.ndimage import morphology,measurements
from pylab import (clf, cm, ginput, gray, imshow, ion, subplot, where)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've got a branch that removes pylab imports; should I open a PR for that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean https://github.com/QuLogic/ocropy/commits/standard-import? Sure, open a PR with the pertinent commits, looks really helpful, I'll happily merge/cherry-pick.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the one; though I just noticed that it's based on the Python 3 work, so it'll be a pain to cherry-pick. Should get that in ASAP and then the rest would be easier.

from scipy.ndimage import morphology, measurements
import PIL

from default import getlocal
from toplevel import *
from toplevel import (checks, ABINARY2, AINT2, AINT3, BOOL, DARKSEG, GRAYSCALE,
LIGHTSEG, LINESEG, PAGESEG)
import chars
import codecs
import ligatures
import lstm
import morph
import multiprocessing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should move up to first block (standard library imports).


################################################################
### exceptions
################################################################

class OcropusException(Exception):
trace = 1
def __init__(self,*args,**kw):
Exception.__init__(self,*args,**kw)

class Unimplemented(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class Internal(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class RecognitionError(OcropusException):
trace = 1
"Some kind of error during recognition."
def __init__(self,explanation,**kw):
self.context = kw
s = [explanation]
s += ["%s=%s"%(k,summary(kw[k])) for k in kw]
message = " ".join(s)
Exception.__init__(self,message)

class Warning(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class BadClassLabel(OcropusException):
trace = 0
"Exception for bad class labels in a dataset or input."
def __init__(self,s):
Exception.__init__(self,s)

class BadImage(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args)

class BadInput(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class FileNotFound(OcropusException):
trace = 0
"""Some file-not-found error during OCRopus processing."""
def __init__(self,fname):
self.fname = fname
def __str__(self):
return "file not found %s"%(self.fname,)
import sl

pickle_mode = 2

def deprecated(f):
def _wrap(f):
warned = 0
def _wrapper(*args,**kw):
if not warned:
print(f, "has been DEPRECATED")
warned = 1
return f(*args,**kw)
return _wrap



################################################################
# text normalization
Expand Down Expand Up @@ -466,10 +399,10 @@ def extractMasked(self,image,index,grow=0,bg=None,margin=0,dtype=None):
mh,mw = mask.shape
box = self.bbox(index)
r0,c0,r1,c1 = box
subimage = improc.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg)
subimage = sl.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg)
return where(mask,subimage,bg)



################################################################
### Object reading and writing
Expand Down Expand Up @@ -517,7 +450,7 @@ class names that have changed."""
unpickler.find_global = unpickle_find_global
return unpickler.load()



################################################################
### Simple record object.
Expand Down Expand Up @@ -581,16 +514,6 @@ def check_valid_class_label(s):
else:
raise BadClassLabel(s)

def summary(x):
"""Summarize a datatype as a string (for display and debugging)."""
if type(x)==numpy.ndarray:
return "<ndarray %s %s>"%(x.shape,x.dtype)
if type(x)==str and len(x)>10:
return '"%s..."'%x
if type(x)==list and len(x)>10:
return '%s...'%x
return str(x)

################################################################
### file name manipulation
################################################################
Expand Down Expand Up @@ -824,19 +747,6 @@ def quick_check_line_components(line_bin,dpi):
there is probably something wrong."""
return 1.0

def deprecated(func):
"""This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used."""
def newFunc(*args, **kwargs):
warnings.warn("Call to deprecated function %s." % func.__name__,
category=DeprecationWarning,stacklevel=2)
return func(*args, **kwargs)
newFunc.__name__ = func.__name__
newFunc.__doc__ = func.__doc__
newFunc.__dict__.update(func.__dict__)
return newFunc

################################################################
### conversion functions
################################################################
Expand Down Expand Up @@ -904,67 +814,6 @@ def obinfo(ob):
result += str(ob.shape)
return result

def save_component(file,object,verbose=0,verify=0):
"""Save an object to disk in an appropriate format. If the object
is a wrapper for a native component (=inherits from
CommonComponent and has a comp attribute, or is in package
ocropus), write it using ocropus.save_component in native format.
Otherwise, write it using Python's pickle. We could use pickle
for everything (since the native components pickle), but that
would be slower and more confusing."""
if hasattr(object,"save_component"):
object.save_component(file)
return
if object.__class__.__name__=="CommonComponent" and hasattr(object,"comp"):
# FIXME -- get rid of this eventually
import ocropus
ocropus.save_component(file,object.comp)
return
if type(object).__module__=="ocropus":
import ocropus
ocropus.save_component(file,object)
return
if verbose:
print("[save_component]")
if verbose:
for k,v in object.__dict__.items():
print(":", k, obinfo(v))
with open(file,"wb") as stream:
pickle.dump(object,stream,pickle_mode)
if verify:
if verbose:
print("[trying to read it again]")
with open(file,"rb") as stream:
pickle.load(stream)

def load_component(file):
"""Load a component. This handles various special cases,
including old-style C++ recognizers (soon to be gotten rid of),
python expressions ("=package.ObjectName(arg1,arg2)"),
and simple pickled Python objects (default)."""
if file[0]=="=":
return pyconstruct(file[1:])
elif file[0]=="@":
file = file[1:]
with open(file,"r") as stream:
# FIXME -- get rid of this eventually
start = stream.read(128)
if start.startswith("<object>\nlinerec\n"):
# FIXME -- get rid of this eventually
warnings.warn("loading old-style linerec: %s"%file)
result = RecognizeLine()
import ocropus
result.comp = ocropus.load_IRecognizeLine(file)
return result
if start.startswith("<object>"):
# FIXME -- get rid of this eventually
warnings.warn("loading old-style cmodel: %s"%file)
import ocroold
result = ocroold.Model()
import ocropus
result.comp = ocropus.load_IModel(file)
return result
return load_object(file)

def binarize_range(image,dtype='B',threshold=0.5):
"""Binarize an image by its range."""
Expand All @@ -973,39 +822,6 @@ def binarize_range(image,dtype='B',threshold=0.5):
if dtype=='B': scale = 255
return array(scale*(image>threshold),dtype=dtype)

def draw_pseg(pseg,axis=None):
if axis is None:
axis = subplot(111)
h = pseg.dim(1)
regions = ocropy.RegionExtractor()
regions.setPageLines(pseg)
for i in range(1,regions.length()):
x0,y0,x1,y1 = (regions.x0(i),regions.y0(i),regions.x1(i),regions.y1(i))
p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor="red",fill=0)
axis.add_patch(p)

def draw_aligned(result,axis=None):
raise Unimplemented("FIXME draw_aligned")
if axis is None:
axis = subplot(111)
axis.imshow(NI(result.image),cmap=cm.gray)
cseg = result.cseg
if type(cseg)==numpy.ndarray: cseg = common.lseg2narray(cseg)
ocropy.make_line_segmentation_black(cseg)
ocropy.renumber_labels(cseg,1)
bboxes = ocropy.rectarray()
ocropy.bounding_boxes(bboxes,cseg)
s = re.sub(r'\s+','',result.output)
h = cseg.dim(1)
for i in range(1,bboxes.length()):
r = bboxes.at(i)
x0,y0,x1,y1 = (r.x0,r.y0,r.x1,r.y1)
p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor=(0.0,0.0,1.0,0.5),fill=0)
axis.add_patch(p)
if i>0 and i-1<len(s):
axis.text(x0,h-y0-1,s[i-1],color="red",weight="bold",fontsize=14)
draw()

def plotgrid(data,d=10,shape=(30,30)):
"""Plot a list of images on a grid."""
ion()
Expand All @@ -1024,13 +840,13 @@ def showrgb(r,g=None,b=None):
imshow(array([r,g,b]).transpose([1,2,0]))

def showgrid(l,cols=None,n=400,titles=None,xlabels=None,ylabels=None,**kw):
if "cmap" not in kw: kw["cmap"] = pylab.cm.gray
if "cmap" not in kw: kw["cmap"] = cm.gray
if "interpolation" not in kw: kw["interpolation"] = "nearest"
n = minimum(n,len(l))
if cols is None: cols = int(sqrt(n))
rows = (n+cols-1)//cols
for i in range(n):
pylab.xticks([]); pylab.yticks([])
pylab.xticks([]) ;pylab.yticks([])
pylab.subplot(rows,cols,i+1)
pylab.imshow(l[i],**kw)
if titles is not None: pylab.title(str(titles[i]))
Expand Down
73 changes: 73 additions & 0 deletions ocrolib/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import inspect
import numpy

def summary(x):
"""Summarize a datatype as a string (for display and debugging)."""
if type(x)==numpy.ndarray:
return "<ndarray %s %s>"%(x.shape,x.dtype)
if type(x)==str and len(x)>10:
return '"%s..."'%x
if type(x)==list and len(x)>10:
return '%s...'%x
return str(x)


################################################################
### Ocropy exceptions
################################################################

class OcropusException(Exception):
trace = 1
def __init__(self,*args,**kw):
Exception.__init__(self,*args,**kw)

class Unimplemented(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class Internal(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class RecognitionError(OcropusException):
trace = 1
"Some kind of error during recognition."
def __init__(self,explanation,**kw):
self.context = kw
s = [explanation]
s += ["%s=%s"%(k,summary(kw[k])) for k in kw]
message = " ".join(s)
Exception.__init__(self,message)

class Warning(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class BadClassLabel(OcropusException):
trace = 0
"Exception for bad class labels in a dataset or input."
def __init__(self,s):
Exception.__init__(self,s)

class BadImage(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args)

class BadInput(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class FileNotFound(OcropusException):
trace = 0
"""Some file-not-found error during OCRopus processing."""
def __init__(self,fname):
self.fname = fname
def __str__(self):
return "file not found %s"%(self.fname,)
Loading